I want to do a ensemble of resnet50 and desnsenet121, but got an error:
Graph disconnected: cannot obtain value for tensor Tensor("input_8:0", shape=(?, 224, 224, 3), dtype=float32) at layer "input_8". The following previous layers were accessed without issue: []
Below is my code for ensembling:
from keras import applications
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.models import Model, Input
#from keras.engine.topology import Input
from keras.layers import Average
def resnet50():
base_model = applications.resnet50.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
last = base_model.output
x = Flatten()(last)
x = Dense(2000, activation='relu')(x)
preds = Dense(200, activation='softmax')(x)
model = Model(base_model.input, preds)
return model
def densenet121():
base_model = applications.densenet.DenseNet121(weights='imagenet', include_top=False, input_shape=(224,224, 3))
last = base_model.output
x = Flatten()(last)
x = Dense(2000, activation='relu')(x)
preds = Dense(200, activation='softmax')(x)
model = Model(base_model.input, preds)
return model
resnet50_model = resnet50()
densenet121_model = densenet121()
ensembled_models = [resnet50_model,densenet121_model]
def ensemble(models,model_input):
outputs = [model.outputs[0] for model in models]
y = Average()(outputs)
model = Model(model_input,y,name='ensemble')
return model
model_input = Input(shape=(224,224,3))
ensemble_model = ensemble(ensembled_models,model_input)
I thought the reason is when I combine reset50 and densenet121, they have their own input layer, even though I make the input shape to be the same. Different input layer leads to conflict. That's just my guess and I am not sure how to fix it
You can set input_tensor=model_input when creating the base models.
def resnet50(model_input):
base_model = applications.resnet50.ResNet50(weights='imagenet', include_top=False, input_tensor=model_input)
# ...
def densenet121(model_input):
base_model = applications.densenet.DenseNet121(weights='imagenet', include_top=False, input_tensor=model_input)
# ...
model_input = Input(shape=(224, 224, 3))
resnet50_model = resnet50(model_input)
densenet121_model = densenet121(model_input)
The base models will then use the provided model_input tensor instead of creating separate input tensors of their own.
Related
I created my own custom toy dataset of graphs in order to learn graph neural networks in Pytorch-geopmetric (PyG).
The data looks like the following:
Data(x=[20, 1], edge_index=[2, 20], y=[1])
I also created a dataloader as follows:
from torch_geometric.loader import DataLoader
train_dataloader = DataLoader(dataset[0:8000], batch_size=32, shuffle=True)
test_dataloader = DataLoader(dataset[8000:10000], batch_size=32, shuffle=True)
Therefore, a batch will look like:
DataBatch(x=[640, 1], edge_index=[2, 640], y=[32], batch=[640], ptr=[33])
My attempt to make a Graph-CNN:
import torch
from torch import nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class GCN(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = GCNConv(dataset[0].num_node_features, 16)
self.conv2 = GCNConv(16, 16)
self.out = nn.Linear(16, 1)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
out = self.out(x)
return out
model = GCN()
When I do something like:
criterion = torch.nn.CrossEntropyLoss()
target = batch.y.to(torch.float32)
loss = criterion(out, target)
loss
I get the error:
ValueError: Expected input batch_size (640) to match target batch_size (32).
Full code is in my github repo here:
https://github.com/amine179/myGNN-learning/blob/main/My%20first%20GCNN.ipynb
I am new to Transfer learning i am working on image classification of 2 categories. i am using InceptionV3, for classification of these images. My data
is in .jpg format. and folder structure is in below format.Since i have 2 categories i gave "binary_crossentropy" too. But facing issues.
Parentfolder/train/categorie1
Parentfolder/train/categorie2
Parentfolder/test/categorie1
Parentfolder/test/categorie2
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
x = Dense(32, activation='relu')(x)
# and a logistic layer -- we have 2 classes
predictions = Dense(2, activation='softmax')(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
layer.trainable = False
for layer in model.layers[249:]:
layer.trainable = True
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
training_set = train_datagen.flow_from_directory(
'C:/Users/Desktop/Transfer/train/',
target_size=(64, 64),
batch_size=5,
class_mode='binary')
test_set = test_datagen.flow_from_directory(
'C:/Users/Desktop/Transfer/test/',
target_size=(64, 64),
batch_size=5,
class_mode='binary')
model.fit_generator(
training_set,
steps_per_epoch=1000,
epochs=10,
validation_data=test_set,
validation_steps=100)
replace this line
predictions = Dense(2, activation='softmax')(x)
with:
predictions = Dense(1, activation='sigmoid')(x)
Or encode your target into columns, f.e. [1,0,1] is the same as [[0,1],[1,0],[0,1]]
I use pretrained embedding vectors for my keras model. Before I did it everything worked and now I get this error:
ValueError: Error when checking input: expected embedding_1_input to
have shape (4,) but got array with shape (1,)
Maybe somebody can help me, what I do wrong here. I am not sure if I did correct model.fit and model.evaluate. Maybe there is a problem?
import csv
import numpy as np
np.random.seed(42)
from keras.models import Sequential, Model
from keras.layers import *
from random import shuffle
from sklearn.model_selection import train_test_split
from keras import optimizers
from keras.callbacks import EarlyStopping
from itertools import groupby
from numpy import asarray
from numpy import zeros
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
#function makes a list of antonyms and synonyms from the files
def preprocessing(filename):
list_words = []
with open(filename) as tsv:
for line in csv.reader(tsv, dialect="excel-tab"):
list_words.append([line[0], line[1]])
return list_words
#function make a list of not relevant pairs by mixing synonyms and
antonyms
def notrelevant(filename, filename2):
list_words = []
with open(filename) as tsv:
with open(filename2) as tsv2:
for lines in zip(csv.reader(tsv, dialect="excel-tab"),csv.reader(tsv2, dialect="excel-tab")):
list_words.append([lines[0][0], lines[1][1]])
return list_words
antonyms_list = preprocessing("antonyms.tsv")
synonyms_list = preprocessing("synonyms.tsv")
notrelevant_list = notrelevant("antonyms.tsv", "synonyms.tsv")
# function combines all antonyms, synonyms in one list with labels,
shuffle them
def data_prepare(ant,syn,nrel):
data = []
for elem1,elem2 in ant:
data.append([[elem1,elem2], "Antonyms"])
for elem1, elem2 in syn:
data.append([[elem1, elem2], "Synonyms"])
for elem1, elem2 in nrel:
data.append([[elem1, elem2], "Not relevant"])
shuffle(data)
return data
data_with_labels_shuffled =
data_prepare(antonyms_list,synonyms_list,notrelevant_list)
def label_to_onehot(labels):
mapping = {label: i for i, label in enumerate(set(labels))}
one_hot = np.empty((len(labels), 3))
for i, label in enumerate(labels):
entry = [0] * len(mapping)
entry[mapping[label]] = 1
one_hot[i] = entry
return (one_hot)
def words_to_ids(labels):
vocabulary = []
word_to_id = {}
ids = []
for word1,word2 in labels:
vocabulary.append(word1)
vocabulary.append(word2)
counter = 0
for word in vocabulary:
if word not in word_to_id:
word_to_id[word] = counter
counter += 1
for word1,word2 in labels:
ids.append([word_to_id [word1], word_to_id [word2]])
return (ids)
def split_data(datas):
data = np.array(datas)
X, y = data[:, 0], data[:, 1]
# split the data to get 60% train and 40% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
y_train = label_to_onehot(y_train)
X_dev, X_test, y_dev, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)
y_dev = label_to_onehot(y_dev)
y_test = label_to_onehot(y_test)
return X_train, y_train, X_dev, y_dev, X_test, y_test
X_train, y_train, X_dev, y_dev, X_test, y_test = split_data(data_with_labels_shuffled)
# prepare tokenizer
t = Tokenizer()
t.fit_on_texts(X_train)
vocab_size = len(t.word_index) + 1
# integer encode the documents
encoded_docs = t.texts_to_sequences(X_train)
# load the whole embedding into memory
embeddings_index = dict()
f = open('glove.6B.50d.txt')
for line in f:
values = line.split()
word = values[0]
coefs = asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
# create a weight matrix for words in training docs
embedding_matrix = zeros((vocab_size, 50))
for word, i in t.word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
VOCABSIZE = len(data_with_labels_shuffled)
EMBSIZE = 50
HIDDENSIZE = 50
KERNELSIZE = 5
MAXEPOCHS = 5
model = Sequential()
model.add(Embedding(vocab_size, 50, weights=[embedding_matrix],
input_length=4, trainable=False))
model.add(Dropout(0.25))
model.add(Bidirectional(GRU(units = HIDDENSIZE // 2)))
#model.add(Flatten())
model.add(Dense(units = 3, activation = "softmax"))
model.compile(loss='categorical_crossentropy', optimizer="adam",
metrics=['accuracy'])
earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='min')
model.fit (X_train, y_train,
batch_size=64,
callbacks = [earlystop],
epochs=100,
validation_data=(X_dev, y_dev),
verbose=1)
scores = model.evaluate(X_test, y_testbatch_size=64)
print("Accuracy is: %.2f%%" %(scores[1] * 100))
I think the problem is that you should pass encoded_docs to your model.fit() function instead of X_train since encoded_docs contains the tokenization of your training data and X_train still only contains a list of words. Moreover, you have to make sure that the input_length parameter of your Embedding layer matches the length of these tokenized training examples that you have created in encoded_docs.
I am using InceptionV3 after fine tuning it to my own data set for a multi class multi label classification problem I made the most important changes like softmax to sigmoid and I am using this loss function
model.compile(loss='binary_crossentropy',optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
but when I predict using the generated model I get a small values like this [ 2.74303748e-04 7.97736086e-03 2.44359515e-04 7.09630767e-05
5.43296163e-04 4.08404367e-03 3.28547925e-01 1.05091414e-04
1.80469989e-03 2.85170972e-03 1.44978316e-04 7.78235449e-03
1.72435939e-02 1.55413849e-02 3.82270187e-01 1.06311939e-03
2.70067930e-01 6.08937175e-04 7.47230020e-04 1.07850268e-04]
the source code is this :(can it be my validation set ?)
import keras
import os
import sys
import glob
import argparse
import matplotlib.pyplot as plt
from keras import __version__
from keras.applications.inception_v3 import InceptionV3,
preprocess_input
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
IM_WIDTH, IM_HEIGHT = 299, 299 #fixed size for InceptionV3
NB_EPOCHS = 3
BAT_SIZE = 32
FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 172
def get_nb_files(directory):
"""Get number of files by searching directory recursively"""
if not os.path.exists(directory):
return 0
cnt = 0
for r, dirs, files in os.walk(directory):
for dr in dirs:
cnt += len(glob.glob(os.path.join(r, dr + "/*")))
return cnt
def setup_to_transfer_learn(model, base_model):
"""Freeze all layers and compile the model"""
for layer in base_model.layers:
layer.trainable = False
#model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
#metrics=['accuracy'])
model.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
def add_new_last_layer(base_model, nb_classes):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top
nb_classes: # of classes
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(FC_SIZE, activation='relu')(x) #new FC layer, random init
predictions = Dense(nb_classes, activation='sigmoid')(x)
model = Model(input=base_model.input, output=predictions)
return model
def setup_to_finetune(model):
"""Freeze the bottom NB_IV3_LAYERS and retrain the remaining top
layers.
note: NB_IV3_LAYERS corresponds to the top 2 inception blocks in the
inceptionv3 arch
Args:
model: keras model
"""
for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]:
layer.trainable = False
for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
layer.trainable = True
model.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
def train(args):
"""Use transfer learning and fine-tuning to train a network on a new
dataset"""
nb_train_samples = get_nb_files(args.train_dir)
nb_classes = len(glob.glob(args.train_dir + "/*"))
nb_val_samples = get_nb_files(args.val_dir)
nb_epoch = int(args.nb_epoch)
batch_size = int(args.batch_size)
# data prep
train_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
test_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
args.train_dir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
)
validation_generator = test_datagen.flow_from_directory(
args.val_dir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
)
# setup model
base_model = InceptionV3(weights='imagenet', include_top=False)
#include_top=False excludes final FC layer
model = add_new_last_layer(base_model, nb_classes)
# transfer learning
setup_to_transfer_learn(model, base_model)
history_tl = model.fit_generator(
train_generator,
nb_epoch=nb_epoch,
samples_per_epoch=nb_train_samples,
validation_data=validation_generator,
nb_val_samples=nb_val_samples,
class_weight='auto')
# fine-tuning
setup_to_finetune(model)
history_ft = model.fit_generator(
train_generator,
samples_per_epoch=nb_train_samples,
nb_epoch=nb_epoch,
validation_data=validation_generator,
nb_val_samples=nb_val_samples,
class_weight='auto')
model.save(args.output_model_file)
if args.plot:
plot_training(history_ft)
def plot_training(history):
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r.')
plt.plot(epochs, val_acc, 'r')
plt.title('Training and validation accuracy')
plt.figure()
plt.plot(epochs, loss, 'r.')
plt.plot(epochs, val_loss, 'r-')
plt.title('Training and validation loss')
plt.show()
if __name__=="__main__":
a = argparse.ArgumentParser()
a.add_argument("--train_dir")
a.add_argument("--val_dir")
a.add_argument("--nb_epoch", default=NB_EPOCHS)
a.add_argument("--batch_size", default=BAT_SIZE)
a.add_argument("--output_model_file", default="inceptionv3-ft.model")
a.add_argument("--plot", action="store_true")
args = a.parse_args()
if args.train_dir is None or args.val_dir is None:
a.print_help()
sys.exit(1)
if (not os.path.exists(args.train_dir)) or (not
os.path.exists(args.val_dir)):
print("directories do not exist")
sys.exit(1)
train(args)
You answer your own question. This is the reason.
I made the most important changes like softmax to sigmoid
Sigmoid function maps the input to the range [0, 1]. Hence the output values of the network are just the outputs of this function and are not class probabilities. Maximum value from the nodes will give you the neuron that has the maximum output and hence the present class.
When you say
but some values should be >0.5(the present classes ) and some others should be <0.5( the non present classes )
in the comments, you are referring to the probability of >0.5 of the present class. What you require for class probabilities is the softmax function.
Therefore replacing your last sigmoid layer with a softmax layer will get you what you think you should be observing.
What is an example of how to use a TensorFlow TFRecord with a Keras Model and tf.session.run() while keeping the dataset in tensors w/ queue runners?
Below is a snippet that works but it needs the following improvements:
Use the Model API
specify an Input()
Load a dataset from a TFRecord
Run through a dataset in parallel (such as with a queuerunner)
Here is the snippet, there are several TODO lines indicating what is needed:
from keras.models import Model
import tensorflow as tf
from keras import backend as K
from keras.layers import Dense, Input
from keras.objectives import categorical_crossentropy
from tensorflow.examples.tutorials.mnist import input_data
sess = tf.Session()
K.set_session(sess)
# Can this be done more efficiently than placeholders w/ TFRecords?
img = tf.placeholder(tf.float32, shape=(None, 784))
labels = tf.placeholder(tf.float32, shape=(None, 10))
# TODO: Use Input()
x = Dense(128, activation='relu')(img)
x = Dense(128, activation='relu')(x)
preds = Dense(10, activation='softmax')(x)
# TODO: Construct model = Model(input=inputs, output=preds)
loss = tf.reduce_mean(categorical_crossentropy(labels, preds))
# TODO: handle TFRecord data, is it the same?
mnist_data = input_data.read_data_sets('MNIST_data', one_hot=True)
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
sess.run(tf.global_variables_initializer())
# TODO remove default, add queuerunner
with sess.as_default():
for i in range(1000):
batch = mnist_data.train.next_batch(50)
train_step.run(feed_dict={img: batch[0],
labels: batch[1]})
print(loss.eval(feed_dict={img: mnist_data.test.images,
labels: mnist_data.test.labels}))
Why is this question relevant?
For high performance training without going back to python
no TFRecord to numpy to tensor conversions
Keras will soon be part of tensorflow
Demonstrate how Keras Model() classes can accept tensors for input data correctly.
Here is some starter information for a semantic segmentation problem example:
example unet Keras model unet.py, happens to be for semantic segmentation.
Keras + Tensorflow Blog Post
An attempt at running the unet model a tf session with TFRecords and a Keras model (not working)
Code to create the TFRecords: tf_records.py
An attempt at running the unet model a tf session with TFRecords and a Keras model is in densenet_fcn.py (not working)
I don't use tfrecord dataset format so won't argue on the pros and cons, but I got interested in extending Keras to support the same.
github.com/indraforyou/keras_tfrecord is the repository. Will briefly explain the main changes.
Dataset creation and loading
data_to_tfrecord and read_and_decode here takes care of creating tfrecord dataset and loading the same. Special care must be to implement the read_and_decode otherwise you will face cryptic errors during training.
Initialization and Keras model
Now both tf.train.shuffle_batch and Keras Input layer returns tensor. But the one returned by tf.train.shuffle_batch don't have metadata needed by Keras internally. As it turns out, any tensor can be easily turned into a tensor with keras metadata by calling Input layer with tensor param.
So this takes care of initialization:
x_train_, y_train_ = ktfr.read_and_decode('train.mnist.tfrecord', one_hot=True, n_class=nb_classes, is_train=True)
x_train_batch, y_train_batch = K.tf.train.shuffle_batch([x_train_, y_train_],
batch_size=batch_size,
capacity=2000,
min_after_dequeue=1000,
num_threads=32) # set the number of threads here
x_train_inp = Input(tensor=x_train_batch)
Now with x_train_inp any keras model can be developed.
Training (simple)
Lets say train_out is the output tensor of your keras model. You can easily write a custom training loop on the lines of:
loss = tf.reduce_mean(categorical_crossentropy(y_train_batch, train_out))
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# sess.run(tf.global_variables_initializer())
sess.run(tf.initialize_all_variables())
with sess.as_default():
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
step = 0
while not coord.should_stop():
start_time = time.time()
_, loss_value = sess.run([train_op, loss], feed_dict={K.learning_phase(): 0})
duration = time.time() - start_time
if step % 100 == 0:
print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
duration))
step += 1
except tf.errors.OutOfRangeError:
print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
finally:
coord.request_stop()
coord.join(threads)
sess.close()
Training (keras style)
One of the features of keras that makes it so lucrative is its generalized training mechanism with the callback functions.
But to support tfrecords type training there are several changes that are need in the fit function
running the queue threads
no feeding in batch data through feed_dict
supporting validation becomes tricky as the validation data will also be coming in through another tensor an different model needs to be internally created with shared upper layers and validation tensor fed in by other tfrecord reader.
But all this can be easily supported by another flag parameter. What makes things messing are the keras features sample_weight and class_weight they are used to weigh each sample and weigh each class. For this in compile() keras creates placeholders (here) and placeholders are also implicitly created for the targets (here) which is not needed in our case the labels are already fed in by tfrecord readers. These placeholders needs to be fed in during session run which is unnecessary in our cae.
So taking into account these changes, compile_tfrecord(here) and fit_tfrecord(here) are the extension of compile and fit and shares say 95% of the code.
They can be used in the following way:
import keras_tfrecord as ktfr
train_model = Model(input=x_train_inp, output=train_out)
ktfr.compile_tfrecord(train_model, optimizer='rmsprop', loss='categorical_crossentropy', out_tensor_lst=[y_train_batch], metrics=['accuracy'])
train_model.summary()
ktfr.fit_tfrecord(train_model, X_train.shape[0], batch_size, nb_epoch=3)
train_model.save_weights('saved_wt.h5')
You are welcome to improve on the code and pull requests.
Update 2018-08-29 this is now directly supported in keras, see the following example:
https://github.com/keras-team/keras/blob/master/examples/mnist_tfrecord.py
Original Answer:
TFRecords are supported by using an external loss. Here are the key lines constructing an external loss:
# tf yield ops that supply dataset images and labels
x_train_batch, y_train_batch = read_and_decode_recordinput(...)
# create a basic cnn
x_train_input = Input(tensor=x_train_batch)
x_train_out = cnn_layers(x_train_input)
model = Model(inputs=x_train_input, outputs=x_train_out)
loss = keras.losses.categorical_crossentropy(y_train_batch, x_train_out)
model.add_loss(loss)
model.compile(optimizer='rmsprop', loss=None)
Here is an example for Keras 2. It works after applying the small patch #7060:
'''MNIST dataset with TensorFlow TFRecords.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
'''
import os
import copy
import time
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import data_flow_ops
from keras import backend as K
from keras.models import Model
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Input
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.callbacks import EarlyStopping
from keras.callbacks import TensorBoard
from keras.objectives import categorical_crossentropy
from keras.utils import np_utils
from keras.utils.generic_utils import Progbar
from keras import callbacks as cbks
from keras import optimizers, objectives
from keras import metrics as metrics_module
from keras.datasets import mnist
if K.backend() != 'tensorflow':
raise RuntimeError('This example can only run with the '
'TensorFlow backend for the time being, '
'because it requires TFRecords, which '
'are not supported on other platforms.')
def images_to_tfrecord(images, labels, filename):
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
""" Save data into TFRecord """
if not os.path.isfile(filename):
num_examples = images.shape[0]
rows = images.shape[1]
cols = images.shape[2]
depth = images.shape[3]
print('Writing', filename)
writer = tf.python_io.TFRecordWriter(filename)
for index in range(num_examples):
image_raw = images[index].tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(rows),
'width': _int64_feature(cols),
'depth': _int64_feature(depth),
'label': _int64_feature(int(labels[index])),
'image_raw': _bytes_feature(image_raw)}))
writer.write(example.SerializeToString())
writer.close()
else:
print('tfrecord %s already exists' % filename)
def read_and_decode_recordinput(tf_glob, one_hot=True, classes=None, is_train=None,
batch_shape=[1000, 28, 28, 1], parallelism=1):
""" Return tensor to read from TFRecord """
print 'Creating graph for loading %s TFRecords...' % tf_glob
with tf.variable_scope("TFRecords"):
record_input = data_flow_ops.RecordInput(
tf_glob, batch_size=batch_shape[0], parallelism=parallelism)
records_op = record_input.get_yield_op()
records_op = tf.split(records_op, batch_shape[0], 0)
records_op = [tf.reshape(record, []) for record in records_op]
progbar = Progbar(len(records_op))
images = []
labels = []
for i, serialized_example in enumerate(records_op):
progbar.update(i)
with tf.variable_scope("parse_images", reuse=True):
features = tf.parse_single_example(
serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
})
img = tf.decode_raw(features['image_raw'], tf.uint8)
img.set_shape(batch_shape[1] * batch_shape[2])
img = tf.reshape(img, [1] + batch_shape[1:])
img = tf.cast(img, tf.float32) * (1. / 255) - 0.5
label = tf.cast(features['label'], tf.int32)
if one_hot and classes:
label = tf.one_hot(label, classes)
images.append(img)
labels.append(label)
images = tf.parallel_stack(images, 0)
labels = tf.parallel_stack(labels, 0)
images = tf.cast(images, tf.float32)
images = tf.reshape(images, shape=batch_shape)
# StagingArea will store tensors
# across multiple steps to
# speed up execution
images_shape = images.get_shape()
labels_shape = labels.get_shape()
copy_stage = data_flow_ops.StagingArea(
[tf.float32, tf.float32],
shapes=[images_shape, labels_shape])
copy_stage_op = copy_stage.put(
[images, labels])
staged_images, staged_labels = copy_stage.get()
return images, labels
def save_mnist_as_tfrecord():
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]
images_to_tfrecord(images=X_train, labels=y_train, filename='train.mnist.tfrecord')
images_to_tfrecord(images=X_test, labels=y_test, filename='test.mnist.tfrecord')
def cnn_layers(x_train_input):
x = Conv2D(32, (3, 3), activation='relu', padding='valid')(x_train_input)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
x_train_out = Dense(classes,
activation='softmax',
name='x_train_out')(x)
return x_train_out
sess = tf.Session()
K.set_session(sess)
save_mnist_as_tfrecord()
batch_size = 100
batch_shape = [batch_size, 28, 28, 1]
epochs = 3000
classes = 10
parallelism = 10
x_train_batch, y_train_batch = read_and_decode_recordinput(
'train.mnist.tfrecord',
one_hot=True,
classes=classes,
is_train=True,
batch_shape=batch_shape,
parallelism=parallelism)
x_test_batch, y_test_batch = read_and_decode_recordinput(
'test.mnist.tfrecord',
one_hot=True,
classes=classes,
is_train=True,
batch_shape=batch_shape,
parallelism=parallelism)
x_batch_shape = x_train_batch.get_shape().as_list()
y_batch_shape = y_train_batch.get_shape().as_list()
x_train_input = Input(tensor=x_train_batch, batch_shape=x_batch_shape)
x_train_out = cnn_layers(x_train_input)
y_train_in_out = Input(tensor=y_train_batch, batch_shape=y_batch_shape, name='y_labels')
cce = categorical_crossentropy(y_train_batch, x_train_out)
train_model = Model(inputs=[x_train_input], outputs=[x_train_out])
train_model.add_loss(cce)
train_model.compile(optimizer='rmsprop',
loss=None,
metrics=['accuracy'])
train_model.summary()
tensorboard = TensorBoard()
# tensorboard disabled due to Keras bug
train_model.fit(batch_size=batch_size,
epochs=epochs) # callbacks=[tensorboard])
train_model.save_weights('saved_wt.h5')
K.clear_session()
# Second Session, pure Keras
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]
x_test_inp = Input(batch_shape=(None,) + (X_test.shape[1:]))
test_out = cnn_layers(x_test_inp)
test_model = Model(inputs=x_test_inp, outputs=test_out)
test_model.load_weights('saved_wt.h5')
test_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
test_model.summary()
loss, acc = test_model.evaluate(X_test, np_utils.to_categorical(y_test), classes)
print('\nTest accuracy: {0}'.format(acc))
I've also been working to improve the support for TFRecords in the following issue and pull request:
#6928 Yield Op support: High Performance Large Datasets via TFRecords, and RecordInput
#7102 Keras Input Tensor API Design Proposal
Finally, it is possible to use tf.contrib.learn.Experiment to train Keras models in TensorFlow.