FileNotFoundError: [Errno 2] No such file or directory: '/content/Vessels-Detection-10/train/00000742_jpg.rf.f14a935bb2e280f6b6e3474673251b7f.xml.xml' - xml-parsing

I try to define my class to load my custom dataset in pytorch.
import os
import xml.etree.ElementTree as ET
import torch
from import Dataset, DataLoader
from torchvision import transforms
import cv2
class CustomDataset(Dataset):
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.transform = transform
self.images = []
self.labels = []
# Load the images and labels into the list
for filename in os.listdir(root_dir):
image_path = os.path.join(root_dir, filename + '.jpg')
label_path = os.path.join(root_dir, filename + '.xml')
image = cv2.imread(image_path)
tree = ET.parse(label_path)
root = tree.getroot()
objects = root.findall('object')
labels = []
for obj in objects:
bndbox = obj.find('bndbox')
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
labels.append([xmin, ymin, xmax, ymax])
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
image = self.images[idx]
label = self.labels[idx]
if self.transform:
image = self.transform(image)
return image, label
But I get the following error:
FileNotFoundError Traceback (most recent call last)
54 # Create the dataset
---> 55 train_dataset = CustomDataset(root_dir='/content/Vessels-Detection-10/train', transform=transform)
56 test_dataset = CustomDataset(root_dir='/content/Vessels-Detection-10/test', transform=transform)
57 val_dataset = CustomDataset(root_dir='/content/Vessels-Detection-10/valid', transform=transform)
2 frames
/usr/lib/python3.8/xml/etree/ in parse(self, source, parser)
582 close_source = False
583 if not hasattr(source, "read"):
--> 584 source = open(source, "rb")
585 close_source = True
586 try:
FileNotFoundError: [Errno 2] No such file or directory: '/content/Vessels-Detection-10/train/00000742_jpg.rf.f14a935bb2e280f6b6e3474673251b7f.xml.xml'
I tried to remove the .xml extension from the label_path:
label_path = os.path.join(root_dir, filename)
But in vain:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/IPython/core/", line 3326, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "", line 55, in
train_dataset = CustomDataset(root_dir='/content/Vessels-Detection-10/train', transform=transform)
File "", line 22, in init
tree = ET.parse(label_path)
File "/usr/lib/python3.8/xml/etree/", line 1202, in parse
tree.parse(source, parser)
File "/usr/lib/python3.8/xml/etree/", line 595, in parse
self._root = parser._parse_whole(source)
File "", line unknown
ParseError: not well-formed (invalid token): line 1, column 0`

I think I had a similar problem some times ago and I remember I had to add a dot at the start of the path for it to work.
So if your path is "/content/etc..."
Make it "./content/etc..."


Resource Exhausted Error while Creating Image captioning model

I have used pre_trained vgg16 for cnn_part to get features of image (which I am not training) and defining the decoder class, which is trained through model. I don't know how resources are getting exhausted in just training decoder part, which I think is not too complex as vgg16. Here I am attaching all the relevant code .
Here is code for vgg16 -->
image_model = tf.keras.applications.VGG16(include_top=False,weights='imagenet' )
image_model.trainable = False
new_input = image_model.input # Any arbitrary shapes with 3 channels
hidden_layer = image_model.layers[-1].output
image_features_extract_model = tf.keras.Model(new_input, hidden_layer)
class VGG16_Encoder(tf.keras.Model):
# This encoder passes the features through a Fully connected layer
def __init__(self , cnn_model ):
super(VGG16_Encoder, self).__init__()
# shape after fc : (batch_size, 49, embedding_dim)
self.conv_base = cnn_model
#self.fc = tf.keras.layers.Dense(embedding_dim)
#self.dropout = tf.keras.layers.Dropout(0.5, noise_shape=None, seed=None)
def call(self, x):
#x = self.fc(x)
#x = tf.nn.relu(x)
x = self.conv_base(x)
x = tf.reshape(x , (BATCH_SIZE, 49 , 512))
return x
Here is the code of decoder --->
def rnn_type(units):
# If you have a GPU, we recommend using CuDNNGRU(provides a 3x speedup than GRU)
# the code automatically does that.
if tf.test.is_gpu_available():
return tf.compat.v1.keras.layers.CuDNNGRU(units,
return tf.keras.layers.GRU(units,
'''The encoder_output(i.e. 'features'), hidden_state(initialized to 0)(i.e. 'hidden') and
the decoder_input (which is the start token)(i.e. 'x') is passed to the decoder.'''
class Rnn_Local_Decoder(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
self.units = units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.units,
self.fc1 = tf.keras.layers.Dense(self.units)
self.dropout = tf.keras.layers.Dropout(0.5, noise_shape=None, seed=None)
self.batchnormalization = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None)
self.fc2 = tf.keras.layers.Dense(vocab_size)
# Implementing Attention Mechanism
self.U_attn = tf.keras.layers.Dense(units)
self.W_attn = tf.keras.layers.Dense(units)
self.V_attn = tf.keras.layers.Dense(1)
def call(self, x, features, hidden):
# features : (batch_size,49,512) (Output from ENCODER)
# hidden : (batch_size, hidden_size) <==> (64,512)
# hidden_with_time_axis : (batch_size, 1, hidden_size) <==> (64,1,512)
hidden_with_time_axis = tf.expand_dims(hidden, 1)
# score shape : (64, 49, 1)
# Attention Function
'''e_ij = f( s_(t-1) , h_j )
e_ij = V_attn(T)*tanh(U_attn * h_j + W_attn * s_t )'''
score = self.V_attn(tf.nn.tanh(self.U_attn(features) + self.W_attn(hidden_with_time_axis)))
# self.Uattn(features) : (64,49,512)
# self.Wattn(hidden_with_time_axis) : (64,1,512)
# tf.nn.tanh(self.Uattn(features) + self.Wattn(hidden_with_time_axis)) : (64,49,512)
# self.Vattn(tf.nn.tanh(self.Uattn(features) + self.Wattn(hidden_with_time_axis))) : (64,49,1) ==> score
# you get 1 at the last axis because you are applying score to self.Vattn
# Then find Probability using Softmax
'''attention_weights(alpha_ij) = softmax(e_ij)'''
attention_weights = tf.nn.softmax(score, axis=1)
# attention_weights : (64, 49, 1)
# Give weights to the different pixels in the image
''' C(t) = Summation(j=1 to T) (attention_weights * VGG-16 features) '''
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
# Context Vector(64,256) = AttentionWeights(64,49,1) * features(64,49,256)
# context_vector shape after sum : (64, 256) ---> doing ele_wise sum of features_vec (axis=1)
# x shape after passing through embedding : (64, 1, 256)
x = self.embedding(x)
# x shape after concatenation : (64, 1, 512)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
# passing the concatenated vector to the GRU
output, state = self.gru(x)
# shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)
# x : (batch_size * max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))
# Adding Dropout and BatchNorm Layers
x= self.dropout(x)
x= self.batchnormalization(x)
# output : (64 * 512)
x = self.fc2(x)
# shape : (64 * 8329(vocab))
return x, state, attention_weights
def reset_state(self, batch_size):
return tf.zeros((batch_size, self.units)) recurrent_initializer='glorot_uniform')
encoder = VGG16_Encoder(image_features_extract_model)
decoder = Rnn_Local_Decoder(embedding_dim, units, vocab_size)
Here is the training code --->
def train_step(img_tensor, target):
loss = 0
# initializing the hidden state for each batch
# because the captions are not related from image to image
hidden = decoder.reset_state(batch_size=target.shape[0])
dec_input = tf.expand_dims([tokenizer.word_index['<start>']] * BATCH_SIZE, 1)
features = encoder(img_tensor)
with tf.GradientTape() as tape:
for i in range(1, max_len):
# passing the features through the decoder
predictions, hidden, _ = decoder(dec_input, features, hidden)
loss += loss_function(target[:, i], predictions)
# using teacher forcing
dec_input = tf.expand_dims(target[:, i], 1)
total_loss = (loss / int(target.shape[1]))
trainable_variables = decoder.trainable_variables
gradients = tape.gradient(loss, trainable_variables)
optimizer.apply_gradients(zip(gradients, trainable_variables))
return loss, total_loss
Here is the error --->
ResourceExhaustedError: Graph execution error:
Detected at node 'gradient_tape/rnn__local__decoder_1/dense_6/MatMul_3/MatMul_1' defined at (most recent call last):
File "/usr/lib/python3.8/", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.8/", line 87, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.8/dist-packages/", line 16, in <module>
File "/usr/local/lib/python3.8/dist-packages/traitlets/config/", line 992, in launch_instance
File "/usr/local/lib/python3.8/dist-packages/ipykernel/", line 612, in start
File "/usr/local/lib/python3.8/dist-packages/tornado/platform/", line 149, in start
File "/usr/lib/python3.8/asyncio/", line 570, in run_forever
File "/usr/lib/python3.8/asyncio/", line 1859, in _run_once
File "/usr/lib/python3.8/asyncio/", line 81, in _run, *self._args)
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 690, in <lambda>
lambda f: self._run_callback(functools.partial(callback, future))
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 743, in _run_callback
ret = callback()
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 787, in inner
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 748, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/", line 381, in dispatch_queue
yield self.process_one()
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 225, in wrapper
runner = Runner(result, future, yielded)
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 714, in __init__
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 748, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/", line 365, in process_one
yield gen.maybe_future(dispatch(*args))
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 209, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/", line 268, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 209, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/", line 543, in execute_request
File "/usr/local/lib/python3.8/dist-packages/tornado/", line 209, in wrapper
yielded = next(result)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/", line 306, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.8/dist-packages/ipykernel/", line 536, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/IPython/core/", line 2854, in run_cell
result = self._run_cell(
File "/usr/local/lib/python3.8/dist-packages/IPython/core/", line 2881, in _run_cell
return runner(coro)
File "/usr/local/lib/python3.8/dist-packages/IPython/core/", line 68, in _pseudo_sync_runner
File "/usr/local/lib/python3.8/dist-packages/IPython/core/", line 3057, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/usr/local/lib/python3.8/dist-packages/IPython/core/", line 3249, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "/usr/local/lib/python3.8/dist-packages/IPython/core/", line 3326, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-82-94347d84883d>", line 11, in <module>
batch_loss, t_loss = train_step(img_tensor, target)
File "<ipython-input-63-9f15c0ea6d9d>", line 30, in train_step
gradients = tape.gradient(loss, trainable_variables)
Node: 'gradient_tape/rnn__local__decoder_1/dense_6/MatMul_3/MatMul_1'
Sorry for uploading so much of code , but I feel that all is necessary to sort this issue.
Thanks in advance !!!
I tried to reduce the data from 40000 images to just 500 images , but then also same error stayed. I even tried to reduce batch size, embedding dim of decoder (512-->128) but all in vain.
Kindly help me fix this issue.

Create dataset from a folder of images and a txt file with labels

I have two folders containing train and test images. And I have two files train.txt and test.txt containing the labels. I want to create a dataset for a PyTorch setting. I mean, I want to use DataLoader and I want the tuples (x_train, y_train) and (x_test, y_test). How can I do? Basically I think that in tensorflow I can use flow_from_dataframe, but in PyTorch?
As an example, in this image I show my pandas dataframe containing filenames and labels:
I am trying with a custom dataset class:
from import Dataset, DataLoader
import os
import pandas as pd
from import read_image
class CoViDxDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
self.img_labels = annotations_file
self.img_dir = img_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, label
train_dataset = CoViDxDataset(train_df, train_path)
When I do train_dataset[0] I have this error:
RuntimeError Traceback (most recent call last)
Input In [84], in <cell line: 1>()
----> 1 train_dataset[0]
Input In [82], in CoViDxDataset.__getitem__(self, idx)
17 img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
18 print(img_path)
---> 19 image = read_image(img_path)
20 label = self.img_labels.iloc[idx, 1]
21 if self.transform:
File ~/anaconda3/envs/openfl/lib/python3.8/site-packages/torchvision/io/, in read_image(path, mode)
243 if not torch.jit.is_scripting() and not torch.jit.is_tracing():
244 _log_api_usage_once(read_image)
--> 245 data = read_file(path)
246 return decode_image(data, mode)
File ~/anaconda3/envs/openfl/lib/python3.8/site-packages/torchvision/io/, in read_file(path)
45 if not torch.jit.is_scripting() and not torch.jit.is_tracing():
46 _log_api_usage_once(read_file)
---> 47 data = torch.ops.image.read_file(path)
48 return data
RuntimeError: Expected a non empty file
where train_path = 'train/' and train_df is the image below:

Dask distributed LocalCluster fails with "TypeError: can't pickle _thread._local objects" when using to hdf5 file

I'm running on one machine with 16 cores and 64GB RAM and want to use dask with LocalCluster, since need the profiling tool for optimization.
I set up the LocalCluster as explained here. Still it gives me the following error:
Traceback (most recent call last):
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/protocol/", line 38, in dumps
result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
TypeError: can't pickle _thread._local objects
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/myusername/remote_code/trials/", line 61, in <module>
File "/home/myusername/remote_code/trials/", line 55, in create_matrix, d_set, dtype="float32")
File "/data/myusername/anaconda3/lib/python3.7/site-packages/dask/array/", line 916, in store
File "/data/myusername/anaconda3/lib/python3.7/site-packages/dask/", line 175, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/data/myusername/anaconda3/lib/python3.7/site-packages/dask/", line 446, in compute
results = schedule(dsk, keys, **kwargs)
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/", line 2499, in get
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/", line 2426, in _graph_to_futures
"tasks": valmap(dumps_task, dsk3),
File "cytoolz/dicttoolz.pyx", line 179, in cytoolz.dicttoolz.valmap
File "cytoolz/dicttoolz.pyx", line 204, in cytoolz.dicttoolz.valmap
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/", line 3186, in dumps_task
return {"function": dumps_function(task[0]), "args": warn_dumps(task[1:])}
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/", line 3195, in warn_dumps
b = dumps(obj)
File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/protocol/", line 51, in dumps
return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
File "/data/myusername/anaconda3/lib/python3.7/site-packages/cloudpickle/", line 1108, in dumps
File "/data/myusername/anaconda3/lib/python3.7/site-packages/cloudpickle/", line 473, in dump
return Pickler.dump(self, obj)
File "/data/myusername/anaconda3/lib/python3.7/", line 437, in dump
File "/data/myusername/anaconda3/lib/python3.7/", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "/data/myusername/anaconda3/lib/python3.7/", line 786, in save_tuple
File "/data/myusername/anaconda3/lib/python3.7/", line 549, in save
self.save_reduce(obj=obj, *rv)
File "/data/myusername/anaconda3/lib/python3.7/", line 662, in save_reduce
File "/data/myusername/anaconda3/lib/python3.7/", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "/data/myusername/anaconda3/lib/python3.7/", line 856, in save_dict
File "/data/myusername/anaconda3/lib/python3.7/", line 882, in _batch_setitems
File "/data/myusername/anaconda3/lib/python3.7/", line 524, in save
rv = reduce(self.proto)
TypeError: can't pickle _thread._local objects
I use the latest versions of all AFAIK needed versions:
python 3.7.3 with anaconda3 on ubuntu 18.04 LTS
dask: 2.3.0
distributed: 2.3.0
bokeh: 1.3.4
cytoolz: 0.10.0
h5py: 2.9.0
Here is the minimal reproducible example:
import os
import dask.array as da
import h5py
import numpy as np
from dask.distributed import Client
MY_USER_NAME = "myusername"
EARTH_RADIUS = 6372.795
N = 20000
def create_matrix():
lat_vec = np.random.random(N) * 90
lon_vec = np.random.random(N) * 180
lat_vec = np.radians(lat_vec)
lon_vec = np.radians(lon_vec)
sin_lat_vec = np.sin(lat_vec)
cos_lat_vec = np.cos(lat_vec)
def _blocked_calculate_great_circle_distance(block, block_info=None):
loc = block_info[0]['array-location']
(row_start, row_stop) = loc[0]
(col_start, col_stop) = loc[1]
# see
# and
row_lon = lon_vec[row_start:row_stop]
col_lon = lon_vec[col_start:col_stop]
delta_lon = row_lon[:, np.newaxis] - col_lon
cos_delta_lon = np.cos(delta_lon)
central_angle = np.arccos(sin_lat_vec[row_start:row_stop, np.newaxis] * sin_lat_vec[col_start:col_stop] +
cos_lat_vec[row_start:row_stop, np.newaxis] * cos_lat_vec[col_start:col_stop]
* cos_delta_lon)
return EARTH_RADIUS * central_angle
dir_path = "/home/" + MY_USER_NAME + "/minimum_reproducible_example/"
if not os.path.exists(dir_path):
file_path = os.path.join(dir_path, "matrix.hdf5")
if os.path.exists(file_path):
with h5py.File(file_path) as f:
d_set = f.create_dataset('/data', shape=(N, N), dtype='f4', fillvalue=0)
w = da.from_array(d_set, chunks=(CHUNK_SIZE, CHUNK_SIZE))
w = w.map_blocks(_blocked_calculate_great_circle_distance, chunks=(CHUNK_SIZE, CHUNK_SIZE), dtype='f4'), d_set, dtype="float32")
if __name__ == '__main__':
client = Client(processes=False)
Can anybody help me with this?

Neural Network Dense Layer Error in Shape attribute

I have created a feed forward neural network but but it is giving a Type Error despite changing the datatype of the parameter. I am really new to keras and Machine Learning so I would appreciate as detailed help as possible. I am attaching the code snippet and the error log below. CODE-
num_of_features = X_train.shape[1]
nb_classes = Y_train.shape[1]
def baseline_model():
def branch2(x):
x = Dense(np.floor(num_of_features*50), activation='sigmoid')(x)
x = Dropout(0.75)(x)
x = Dense(np.floor(num_of_features*20), activation='sigmoid')(x)
x = Dropout(0.5)(x)
x = Dense(np.floor(num_of_features), activation='sigmoid')(x)
x = Dropout(0.1)(x)
return x
main_input = Input(shape=(num_of_features,), name='main_input')
x = main_input
x = branch2(x)
main_output = Dense(nb_classes, activation='softmax')(x)
model = Model(input=main_input, output=main_output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'categorical_crossentropy'])
return model
model = baseline_model()
Traceback (most recent call last):
File "", line 143, in <module>
model = baseline_model()
File "", line 137, in baseline_model
x = branch2(x)
File "", line 124, in branch2
x = Dense(np.floor(num_of_features*50), activation='sigmoid')(x)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/engine/", line 432, in __call__[0])
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/layers/", line 872, in build
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/legacy/", line 91, in wrapper
return func(*args, **kwargs)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/engine/", line 249, in add_weight
weight = K.variable(initializer(shape),
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/", line 218, in __call__
dtype=dtype, seed=self.seed)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/keras/backend/", line 4077, in random_uniform
dtype=dtype, seed=seed)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/", line 242, in random_uniform
rnd = gen_random_ops.random_uniform(shape, dtype, seed=seed1, seed2=seed2)
File "/home/shashank/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/", line 674, in random_uniform
File "/home/shashank/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/", line 609, in _apply_op_helper
File "/home/shashank/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/", line 60, in _SatisfiesTypeConstraint
", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter 'shape' has DataType float32 not in list of allowed values: int32, int64
Why are you using np.floor for the shape in your Dense layers? This will produce a float, you need an int there. Removing np.floor should solve your problem.

InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [1000,625]

I get the above unexpected error when trying to run this code:
# -*- coding: utf-8 -*-
Created on Fri Jun 24 10:38:04 2016
#author: andrea
# pylint: disable=missing-docstring
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
from pylab import *
import argparse
import mlp
# Basic model parameters as external flags. = tf.python.platform.flags._FlagValues() = argparse.ArgumentParser()
flags =
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 20, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 1000, 'Batch size. Must divide evenly into the dataset sizes.')
flags.DEFINE_integer('num_samples', 100000, 'Total number of samples. Needed by the reader')
flags.DEFINE_string('training_set_file', 'godzilla_dataset_size625', 'Training set file')
flags.DEFINE_string('test_set_file', 'godzilla_testset_size625', 'Test set file')
flags.DEFINE_string('test_size', 1000, 'Test set size')
def placeholder_inputs(batch_size):
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, mlp.NUM_INPUT))
labels_placeholder = tf.placeholder(tf.float32, shape=(batch_size, mlp.NUM_OUTPUT))
return images_placeholder, labels_placeholder
def fill_feed_dict(data_set_file, images_pl, labels_pl):
for l in range(int(FLAGS.num_samples/FLAGS.batch_size)):
data_set = genfromtxt("../dataset/" + data_set_file, skip_header=l*FLAGS.batch_size, max_rows=FLAGS.batch_size)
data_set = reshape(data_set, [FLAGS.batch_size, mlp.NUM_INPUT + mlp.NUM_OUTPUT])
images = data_set[:, :mlp.NUM_INPUT]
labels_feed = reshape(data_set[:, mlp.NUM_INPUT:], [FLAGS.batch_size, mlp.NUM_OUTPUT])
images_feed = reshape(images, [FLAGS.batch_size, mlp.NUM_INPUT])
feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed,
yield feed_dict
def reader(data_set_file, images_pl, labels_pl):
data_set = loadtxt("../dataset/" + data_set_file)
images = data_set[:, :mlp.NUM_INPUT]
labels_feed = reshape(data_set[:, mlp.NUM_INPUT:], [data_set.shape[0], mlp.NUM_OUTPUT])
images_feed = reshape(images, [data_set.shape[0], mlp.NUM_INPUT])
feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed,
return feed_dict, labels_pl
def run_training():
tot_training_loss = []
tot_test_loss = []
with tf.Graph().as_default() as g:
images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size)
test_images_pl, test_labels_pl = placeholder_inputs(FLAGS.test_size)
logits = mlp.inference(images_placeholder)
test_pred = mlp.inference(test_images_pl, reuse=True)
loss = mlp.loss(logits, labels_placeholder)
test_loss = mlp.loss(test_pred, test_labels_pl)
train_op =, FLAGS.learning_rate)
#summary_op = tf.merge_all_summaries()
init = tf.initialize_all_variables()
saver = tf.train.Saver()
sess = tf.Session()
#summary_writer = tf.train.SummaryWriter("./", sess.graph)
test_feed, test_labels_placeholder = reader(FLAGS.test_set_file, test_images_pl, test_labels_pl)
# Start the training loop.
for step in xrange(FLAGS.max_steps):
start_time = time.time()
feed_gen = fill_feed_dict(FLAGS.training_set_file, images_placeholder, labels_placeholder)
for feed_dict in feed_gen:
_, loss_value =[train_op, loss], feed_dict=feed_dict)
_, test_loss_val =[test_pred, test_loss], feed_dict=test_feed)
#if i % 10 == 0:
#print('%d minibatches analyzed...'%i)
if step % 1 == 0:
duration = time.time() - start_time
print('Epoch %d (%.3f sec):\n training loss = %f \n test loss = %f ' % (step, duration, loss_value, test_loss_val))
predictions =, feed_dict=test_feed)
savetxt("predictions", predictions)
savetxt("training_loss", tot_training_loss)
savetxt("test_loss", tot_test_loss)
scatter(test_feed[test_labels_placeholder], predictions)
#plot([.4, .6], [.4, .6])
#if __name__ == '__main__':
this is mlp:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import tensorflow as tf
def inference(images, reuse=None):
with tf.variable_scope('hidden1', reuse=reuse):
weights = tf.get_variable(name='weights', shape=[NUM_INPUT, NUM_HIDDEN], initializer=tf.contrib.layers.xavier_initializer())
weight_decay = tf.mul(tf.nn.l2_loss(weights), 0.00001, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
biases = tf.Variable(tf.constant(0.0, name='biases', shape=[NUM_HIDDEN]))
hidden1_output = tf.nn.relu(tf.matmul(images, weights)+biases, name='hidden1')
with tf.variable_scope('output', reuse=reuse):
weights = tf.get_variable(name='weights', shape=[NUM_HIDDEN, NUM_OUTPUT], initializer=tf.contrib.layers.xavier_initializer())
weight_decay = tf.mul(tf.nn.l2_loss(weights), 0.00001, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
biases = tf.Variable(tf.constant(0.0, name='biases', shape=[NUM_OUTPUT]))
output = tf.nn.relu(tf.matmul(hidden1_output, weights)+biases, name='output')
return output
def loss(outputs, labels):
rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(labels, outputs))), name="rmse")
tf.add_to_collection('losses', rmse)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def training(loss, learning_rate):
tf.scalar_summary(, loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
here the error:
Traceback (most recent call last):
File "<ipython-input-1-f16dfed3b99b>", line 1, in <module>
runfile('/home/andrea/test/python/', wdir='/home/andrea/test/python')
File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/", line 714, in runfile
execfile(filename, namespace)
File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/", line 81, in execfile
builtins.execfile(filename, *where)
File "/home/andrea/test/python/", line 127, in <module>
File "/home/andrea/test/python/", line 105, in run_training
_, test_loss_val =[test_pred, test_loss], feed_dict=test_feed)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/", line 372, in run
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/", line 636, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/", line 708, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/", line 728, in _do_call
raise type(e)(node_def, op, message)
InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [1000,625]
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[1000,625], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'Placeholder', defined at:
File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/", line 205, in <module>
File "/usr/local/lib/python2.7/dist-packages/ipykernel/", line 442, in start
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/", line 162, in start
super(ZMQIOLoop, self).start()
File "/usr/local/lib/python2.7/dist-packages/tornado/", line 883, in start
handler_func(fd_obj, events)
File "/usr/local/lib/python2.7/dist-packages/tornado/", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/", line 440, in _handle_events
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/", line 414, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tornado/", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/", line 228, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/", line 391, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/", line 199, in do_execute
shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/", line 2723, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/", line 2831, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python2.7/dist-packages/IPython/core/", line 2885, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-f16dfed3b99b>", line 1, in <module>
runfile('/home/andrea/test/python/', wdir='/home/andrea/test/python')
File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/", line 714, in runfile
execfile(filename, namespace)
File "/usr/local/lib/python2.7/dist-packages/spyderlib/widgets/externalshell/", line 81, in execfile
builtins.execfile(filename, *where)
File "/home/andrea/test/python/", line 127, in <module>
File "/home/andrea/test/python/", line 79, in run_training
images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size)
File "/home/andrea/test/python/", line 37, in placeholder_inputs
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, mlp.NUM_INPUT))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/", line 895, in placeholder
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/", line 1238, in _placeholder
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/", line 704, in apply_op
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/", line 1230, in __init__
self._traceback = _extract_stack()
I really don't understand why. It looks to me that I'm feeding all the placeholders before using them. I also removed the "merge_all_summaries" since this problem is similar to other (this and this), but it didn't help
EDIT: training data: 100000 samples x 625 features
test data: 1000 samples x 625 features
num. output: 1
I think the problem is in this code:
def loss(outputs, labels):
rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(labels, outputs))), name="rmse")
tf.add_to_collection('losses', rmse)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
You're adding up all the losses from collection 'losses', including both your training and test losses. In particular, in this code:
loss = mlp.loss(logits, labels_placeholder)
test_loss = mlp.loss(test_pred, test_labels_pl)
The first call to mlp.loss will add training losses to the 'losses' collection. The second call to mlp.loss will incorporate those values in its result. So when you try to compute the test_loss, Tensorflow complains that you didn't feed all of the inputs (the training placeholders).
Perhaps you meant something like this?
def loss(outputs, labels):
rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(labels, outputs))), name="rmse")
return rmse
I hope that helps!
