Random Forest always predicts same class - machine-learning

I have been trying to make a Random Forest model for Sign Language classification. The Data Set is balanced and the accuracy is 98%, but it always predicts the same class.
This is how I get and process the data:
train_dir = "../input/asl-alphabet/asl_alphabet_train/asl_alphabet_train/"
test_dir = "../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/"
train_len = 87000
image_size=50
def get_data(folder):
X = np.empty((train_len, image_size, image_size, 3), dtype = np.float32)
y = np.empty((train_len), dtype = int)
cnt = 0
for folderName in os.listdir(folder):
if not folderName.startswith('.'):
if folderName in ['A']:
label = 0
elif folderName in ['B']:
label = 1
elif folderName in ['C']:
label = 2
elif folderName in ['D']:
label = 3
elif folderName in ['E']:
label = 4
elif folderName in ['F']:
label = 5
elif folderName in ['G']:
label = 6
elif folderName in ['H']:
label = 7
elif folderName in ['I']:
label = 8
elif folderName in ['J']:
label = 9
elif folderName in ['K']:
label = 10
elif folderName in ['L']:
label = 11
elif folderName in ['M']:
label = 12
elif folderName in ['N']:
label = 13
elif folderName in ['O']:
label = 14
elif folderName in ['P']:
label = 15
elif folderName in ['Q']:
label = 16
elif folderName in ['R']:
label = 17
elif folderName in ['S']:
label = 18
elif folderName in ['T']:
label = 19
elif folderName in ['U']:
label = 20
elif folderName in ['V']:
label = 21
elif folderName in ['W']:
label = 22
elif folderName in ['X']:
label = 23
elif folderName in ['Y']:
label = 24
elif folderName in ['Z']:
label = 25
elif folderName in ['del']:
label = 26
elif folderName in ['nothing']:
label = 27
elif folderName in ['space']:
label = 28
else:
label = 29
for image_filename in tqdm(os.listdir(folder + folderName)):
img_file = cv2.imread(folder + folderName + '/' + image_filename)
if img_file is not None:
img_file = skimage.transform.resize(img_file, (image_size, image_size, 3))
img_arr = np.asarray(img_file).reshape((-1, image_size, image_size, 3))
X[cnt] = img_arr
y[cnt] = label
cnt += 1
return X,y
letters, labels= get_data(train_dir)
print("The shape of letters is : ", letters.shape)
#preprocessing
import tensorflow as tf
X_train, X_test, y_train, y_test = train_test_split(letters, labels, test_size=0.3, random_state=42, stratify=labels)
X_valid, X_train = X_train[:20000] / 255., X_train[20000:] / 255.
y_valid, y_train = y_train[:20000], y_train[20000:]
X_test = X_test / 255.
#reshape to 2d array
nsamples, nx, ny, nrgb = X_train.shape
x_train2 = X_train.reshape((nsamples,nx*ny*nrgb))
#so,eventually,model.predict() should also be a 2d input
nsamples, nx, ny, nrgb = X_test.shape
x_test2 = X_test.reshape((nsamples,nx*ny*nrgb))
This is how I made the model:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=20,
n_estimators=100, oob_score=True)
model.fit(x_train2,y_train)
y_pred=model.predict(x_test2)
y_pred
#y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)
accuracy_score(y_pred,y_test)
print(classification_report(y_pred,y_test))
To test the predictions I use:
label_names = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N',
'O','P','Q','R','S','T','U','V','W','X','Y','Z','del','nothing','space']
img_path='../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/L_test.jpg'
img_arr=cv2.imread(img_path)
img_arr=cv2.resize(img_arr,(64,64))
#so,eventually,model.predict() should also be a 2d input
nx, ny, nrgb = img_arr.shape
img_arr2 = img_arr.reshape(1,(nx*ny*nrgb))
classes = ["A","B","C","D","E","F","G","H","I","J", "K", "L", "M" "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "nothing", "space"]
ans=model.predict(img_arr2)
print(label_names[ans[0]])
I have tried to implement Grid Search but I wasn't able to make it work. (I am writing it on Kaggle and it tells "Your notebook tried to allocate more memory than is available")
I am quite new to this, so I am not completely certain about anything in my code.

You are missing the preprocessing of the images between training and testing. You need to be applying the same steps on the test set so your model is getting same kind of inputs. Try the following:
label_names = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N',
'O','P','Q','R','S','T','U','V','W','X','Y','Z','del','nothing','space']
img_path='../input/asl-alphabet/asl_alphabet_test/asl_alphabet_test/L_test.jpg'
img_arr=cv2.imread(img_path)
# Preprocess test image
img_arr = skimage.transform.resize(img_arr, (50, 50, 3))
img_arr = img_arr/255.
#so,eventually,model.predict() should also be a 2d input
nx, ny, nrgb = img_arr.shape
img_arr2 = img_arr.reshape(1,(nx*ny*nrgb))
classes = ["A","B","C","D","E","F","G","H","I","J", "K", "L", "M" "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "nothing", "space"]
ans=model.predict(img_arr2)
print(label_names[ans[0]])

Related

UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd42c66d3b0>

I am trying to do an object detection problem and been working with aquarium dataset from roboflow. I have been trying to create a bounding box for the fishes, but I have getting the error:
UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd42c66d3b0>
I also tried to see what images are corrupted and ran a code
import PIL
from pathlib import Path
from PIL import UnidentifiedImageError
count = 0
path = Path("/content/drive/MyDrive/archive/Aquarium Combined").rglob("*.jpg")
for img_p in path:
try:
img = PIL.Image.open(img_p)
except PIL.UnidentifiedImageError:
print(img_p)
count +=1
print(count)
It has given me a count of 651 images, but my dataset has 662 images. I guess PIL doesn't know how to decode it or I don't know what the problem is. I will attach a sample image file name
/content/drive/MyDrive/archive/Aquarium Combined/test/IMG_2301_jpeg_jpg.rf.2c19ae5efbd1f8611b5578125f001695.jpg
Full traceback:
UnidentifiedImageError Traceback (most recent call last)
<ipython-input-31-2785d562a97e> in <module>()
4 sample[1]['boxes'][:, [1, 0, 3, 2]],
5 [classes[i] for i in sample[1]['labels']],
----> 6 width=4).permute(1, 2, 0)
7 )
3 frames
/usr/local/lib/python3.7/dist-packages/PIL/Image.py in open(fp, mode)
2894 if mode == "P":
2895 from . import ImagePalette
-> 2896
2897 im.palette = ImagePalette.ImagePalette("RGB", im.im.getpalette("RGB"))
2898 im.readonly = 1
UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fd42c66d3b0>
Also I am providing the class functions"
class AquariumDetection(datasets.VisionDataset):
def __init__(
self,
root: str,
split = "train",
transform= None,
target_transform = None,
transforms = None,
) -> None:
super().__init__(root, transforms, transform, target_transform)
self.split = split
self.coco = COCO(os.path.join(root, split, "_annotations.coco.json"))
self.ids = list(sorted(self.coco.imgs.keys()))
self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
def _load_image(self, id: int) -> Image.Image:
path = self.coco.loadImgs(id)[0]["file_name"]
image = cv2.imread(os.path.join(self.root, self.split, path))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return image
def _load_target(self, id: int):
return self.coco.loadAnns(self.coco.getAnnIds(id))
def __getitem__(self, index: int):
id = self.ids[index]
image = self._load_image(id)
target = copy.deepcopy(self._load_target(id))
boxes = [t['bbox'] + [t['category_id']] for t in target]
if self.transforms is not None:
transformed = self.transforms(image=image, bboxes=boxes)
image = transformed['image']
boxes = transformed['bboxes']
new_boxes = []
for box in boxes:
xmin = box[0]
ymin = box[1]
xmax = xmin + box[2]
ymax = ymin + box[3]
new_boxes.append([ymin, xmin, ymax, xmax])
boxes = torch.tensor(new_boxes, dtype=torch.float32)
_, h, w = image.shape
targ = {}
targ["boxes"] = boxes
targ["labels"] = torch.tensor([t["category_id"] for t in target], dtype=torch.int64)
targ["image_id"] = torch.tensor([t["image_id"] for t in target])
targ["area"] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
targ["iscrowd"] = torch.tensor([t["iscrowd"] for t in target], dtype=torch.int64)
targ["img_scale"] = torch.tensor([1.0])
targ['img_size'] = (h, w)
image = image.div(255)
normalize = T.Compose([T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
return normalize(image), targ, index
def __len__(self) -> int:
return len(self.ids)

ggplotly tooltip is showing data twice

I have 2 datasets included in one chart using ggplot. I am using ggplotly to create a tooltip but the information in the tooltips for the 2 points is showing twice. The following code is a little lengthy but will recreate the chart:
AreaName <- c("A", "B", "C", "A", "B", "C")
Timeperiod <- c("2018", "2018", "2018", "2019", "2019", "2019")
Value <- c(11.5, 39.3, 9.4, 14.2, 40.7, 19.1)
df <- data.frame(cbind(AreaName, Timeperiod, Value), stringsAsFactors = F)
df$Value <- as.numeric(df$Value)
AreaName <- c("A", "A")
Timeperiod <- c("2019", "2020")
qtr <- c("Q1-Q2", "Q1-Q2")
Value <- c(15.6, 10.2)
df2 <- data.frame(cbind(Timeperiod, qtr, AreaName, Value), stringsAsFactors = F)
df2$Value <- as.numeric(df2$Value)
ggp <- ggplotly(ggplot(data = df, aes(x=Timeperiod, y=Value, group = AreaName, colour = AreaName, text = paste("Area name: ", AreaName, "<br>Time period: ", Timeperiod, "<br>Rate: ", round(Value,1), "per 100,000"))) +
geom_line() +
geom_point() +
geom_point(data = df2, aes(shape = c(paste(AreaName, qtr, Timeperiod)),text = paste("Area name: ", AreaName, "<br>Quarter: ", qtr, "<br>Time period: ", Timeperiod, "<br>Rate: ", round(Value,1), "per 100,000"))) +
scale_shape_manual(values = c(18, 17)) +
theme(axis.text.x = element_text(vjust = 0.5), axis.title.x = element_blank()) +
labs(y = "Crude rate per 100,000 persons all ages", colour = "Area", shape = "") +
guides(shape = guide_legend(order = 2),colour = guide_legend(order = 1)) +
expand_limits(y=0), tooltip = "text")
ggpNames <- unique(df$AreaName)
legs <- paste(df2$AreaName, df2$qtr, df2$Timeperiod)
ggpNames <- c(ggpNames,legs)
for (i in 1:length(ggp$x$data)) { # this goes over all places where legend values are stored
n1 <- ggp$x$data[[i]]$name # and this is how the value is stored in plotly
n2 <- " "
for (j in 1:length(ggpNames)) {
if (grepl(x = n1, pattern = ggpNames[j])) {n2 = ggpNames[j]} # if the plotly legend name contains the original value, replace it with the original value
}
ggp$x$data[[i]]$name <- n2 # now is the time for actual replacement
if (n2 == " ") {ggp$x$data[[i]]$showlegend = FALSE} # sometimes plotly adds to the legend values that we don't want, this is how to get rid of them, too
}
ggp %>% config(displaylogo = FALSE, modeBarButtonsToRemove = list("autoScale2d", "resetScale2d","select2d", "lasso2d", "zoomIn2d", "zoomOut2d", "toggleSpikelines", "zoom2d", "pan2d"))
ggp
Does anyone have an elegant solution to this?
Thanks
Do not define text in geom_point for the second dataframe df2. Then you will get only one tooltip for those two points.
ggp <- ggplotly(ggplot(data = df, aes(x=Timeperiod, y=Value, group = AreaName, colour = AreaName, text = paste("Area name: ", AreaName, "<br>Time period: ", Timeperiod, "<br>Rate: ", round(Value,1), "per 100,000"))) +
geom_line() +
geom_point() +
geom_point(data = df2, aes(shape = c(paste(AreaName, qtr, Timeperiod)) #,
#text = paste("Area name: ", AreaName, "<br>Quarter: ", qtr, "<br>Time period: ", Timeperiod, "<br>Rate: ", round(Value,1), "per 100,000")
)) +
scale_shape_manual(values = c(18, 17)) +
theme(axis.text.x = element_text(vjust = 0.5), axis.title.x = element_blank()) +
labs(y = "Crude rate per 100,000 persons all ages", colour = "Area", shape = "") +
guides(shape = guide_legend(order = 2),colour = guide_legend(order = 1)) +
expand_limits(y=0), tooltip = "text")
ggpNames <- unique(df$AreaName)
legs <- paste(df2$AreaName, df2$qtr, df2$Timeperiod)
ggpNames <- c(ggpNames,legs)
for (i in 1:length(ggp$x$data)) { # this goes over all places where legend values are stored
n1 <- ggp$x$data[[i]]$name # and this is how the value is stored in plotly
n2 <- " "
for (j in 1:length(ggpNames)) {
if (grepl(x = n1, pattern = ggpNames[j])) {n2 = ggpNames[j]} # if the plotly legend name contains the original value, replace it with the original value
}
ggp$x$data[[i]]$name <- n2 # now is the time for actual replacement
if (n2 == " ") {ggp$x$data[[i]]$showlegend = FALSE} # sometimes plotly adds to the legend values that we don't want, this is how to get rid of them, too
}
ggp %>% config(displaylogo = FALSE, modeBarButtonsToRemove = list("autoScale2d", "resetScale2d","select2d", "lasso2d", "zoomIn2d", "zoomOut2d", "toggleSpikelines", "zoom2d", "pan2d"))
ggp

Creating flowchart with DiagrammeR nodes and edges instead of graphviz

I would like to create a flowchart using the DiagrammeR nodes and edges functionality with R instead of using the graphviz wrapper function.
However, I can't figure out how to make the edges straight to make it nice.
This is the graphviz solution that looks like what I want:
# Packages needed for the test
library(DiagrammeR)
# grViz solution
grViz("digraph flowchart {
# node definitions with substituted label text
node [fontname = Helvetica, shape = rectangle]
tab1 [label = '##1', group=gr1]
tab2 [label = '##2', group=gr2]
tab3 [label = '##3', group=gr3]
invis1 [style = invis, shape=point, width = 0, group=gr1]
invis1a [style = invis, shape=point, width = 0, group=gr2]
invis1b [style = invis, shape=point, width = 0, group=gr3]
# edge definitions with the node IDs
edge [arrowhead='none']
tab1 -> invis1;
invis1a -> invis1 -> invis1b; {rank=same invis1a invis1 invis1b}
edge [arrowhead='normal']
invis1a -> tab2;
invis1b -> tab3; {rank=same tab2 tab3}}
[1]: 'A'
[2]: 'B'
[3]: 'C'
")
This is my attempt to recreate the same graph with the nodes and edges solution:
# Packages needed for the test
library(DiagrammeR)
library(magrittr)
# Node and edge df solution
create_graph() %>%
add_node( # id 1
label = "A",
type = "group_1",
node_aes = node_aes(
style = "filled",
shape = "rectangle",
fixedsize = FALSE)
) %>%
add_node( # id 2
type = "group_1",
node_aes = node_aes(
style = "invisible",
height = 0,
width = 0)
) %>%
add_edge(
from = 1,
to = 2,
edge_aes = edge_aes(
arrowhead = "none")
) %>%
add_node( # id 3
type = "group_2",
node_aes = node_aes(
style = "invisible",
height = 0,
width = 0)
) %>%
add_edge(
from = 2,
to = 3,
edge_aes = edge_aes(
arrowhead = "none")
) %>%
add_node( # id 4
type = "group_3",
node_aes = node_aes(
style = "invisible",
height = 0,
width = 0)
) %>%
add_edge(
from = 2,
to = 4,
edge_aes = edge_aes(
arrowhead = "none")
) %>%
add_node( # id 5
label = "B",
type = "group_2",
node_aes = node_aes(
style = "filled",
shape = "rectangle",
fixedsize = FALSE)
) %>%
add_edge(
from = 3,
to = 5,
edge_aes = edge_aes(
arrowhead = "normal")
) %>%
add_node( # id 6
label = "C",
type = "group_3",
node_aes = node_aes(
style = "filled",
shape = "rectangle",
fixedsize = FALSE)
) %>%
add_edge(
from = 4,
to = 6,
edge_aes = edge_aes(
arrowhead = "normal")
) %>%
render_graph()

How to use multiple GPUs effectively when training deep networks?

I am using a machine which has 2 GPUs Titan Black to train my deep learning model which has 3 layers (3x3, 3x3 and 5x5).
The training runs pretty well but when I watch nvidia-smi (watch every 1 sec), I realized that my program uses only one GPU for computation, the second one always 0% even when the first one reach 100%.
I am trying to use tf.device to assign specific tasks for each of them but then they run one-by-one, not in parallel, and the total time was even increased, not reduced (I guess because 2 GPUs had to exchange values with each other)
Below is my program. It is quite messy, maybe you just need to pay attention at the graph where I use tf.device is enough...
Thank you so much!
import tensorflow as tf
import numpy as np
from six.moves import cPickle as pickle
import matplotlib.pyplot as plt
from os import listdir, sys
from os.path import isfile, join
from time import gmtime, strftime
import time
def validatePath(path):
path = path.replace("\\","/")
if (path[len(path)-1] != "/"):
path = path + "/"
return path
hidden_size_default = np.array([16, 32, 64, 32])
cnn1_default = 3
cnn2_default = 3
cnn3_default = 5
SIZE_BATCH_VALID = 200
input_path = 'ARCHIVES-sub-dataset'
output_path = 'ARCHIVES-model'
log_address = "trainlog.txt"
tf.app.flags.DEFINE_integer('h0', hidden_size_default[0], 'Size of hidden layer 0th')
tf.app.flags.DEFINE_integer('h1', hidden_size_default[1], 'Size of hidden layer 1st')
tf.app.flags.DEFINE_integer('h2', hidden_size_default[2], 'Size of hidden layer 2nd')
tf.app.flags.DEFINE_integer('h3', hidden_size_default[3], 'Size of hidden layer 3rd')
tf.app.flags.DEFINE_integer('k1', cnn1_default , 'Size of kernel 1st')
tf.app.flags.DEFINE_integer('k2', cnn2_default , 'Size of kernel 2nd')
tf.app.flags.DEFINE_integer('k3', cnn3_default , 'Size of kernel 3rd')
tf.app.flags.DEFINE_string('input_path', input_path, 'The parent directory which contains 2 directories: dataset and label')
tf.app.flags.DEFINE_string('output_path', output_path, 'The directory which will store models (you have to create)')
tf.app.flags.DEFINE_string('log_address', log_address, 'The file name which will store the log')
FLAGS = tf.app.flags.FLAGS
load_path = FLAGS.input_path
save_model_path = FLAGS.output_path
log_addr = FLAGS.log_address
load_path = validatePath(load_path)
save_model_path = validatePath(save_model_path)
cnn1 = FLAGS.k1
cnn2 = FLAGS.k2
cnn3 = FLAGS.k3
hidden_size = np.array([FLAGS.h0, FLAGS.h1, FLAGS.h2, FLAGS.h3])
# Shuffle the dataset and its label
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation,:]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
def writemyfile(mystring):
with open(log_addr, "a") as myfile:
myfile.write(str(mystring + "\n"))
num_labels = 5
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def DivideSets(input_set):
length_set = input_set.shape[0]
index_70 = int(length_set*0.7)
index_90 = int(length_set*0.9)
set_train = input_set[0:index_70]
set_valid = input_set[index_70:index_90]
set_test = input_set[index_90:length_set]
return np.float32(set_train), np.float32(set_valid), np.float32(set_test)
# from 1-value labels to 5 values of (0 and 1)
def LabelReconstruct(label_set):
label_set = label_set.astype(int)
new_label_set = np.zeros(shape=(len(label_set),num_labels))
for i in range(len(label_set)):
new_label_set[i][label_set[i]] = 1
return new_label_set.astype(int)
def LoadDataSet(load_path):
list_data = [f for f in listdir(load_path + "dataset/") if isfile(join(load_path + "dataset/", f))]
list_label = [f for f in listdir(load_path + "label/") if isfile(join(load_path + "dataset/", f))]
if list_data.sort() == list_label.sort():
return list_data
else:
print("data and labels are not suitable")
return 0
# load, randomize, normalize images and reconstruct labels
def PrepareData(*arg):
filename = arg[0]
loaded_dataset = pickle.load( open( load_path + "dataset/" + filename, "rb" ))
loaded_labels = pickle.load( open( load_path + "label/" + filename, "rb" ))
if len(arg) == 1:
datasize = len(loaded_labels)
elif len(arg) == 2:
datasize = int(arg[1])
else:
print("not more than 2 arguments please!")
dataset_full,labels_full = randomize(loaded_dataset[0:datasize], loaded_labels[0:datasize])
return NormalizeData(dataset_full), LabelReconstruct(labels_full)
def NormalizeData(dataset):
dataset = dataset - (dataset.mean())
dataset = dataset / (dataset.std())
return dataset
### LOAD DATA
listfiles = LoadDataSet(load_path)
# divide
listfiles_train = listfiles[0:15]
listfiles_valid = listfiles[15:25]
listfiles_test = listfiles[25:len(listfiles)]
graphCNN = tf.Graph()
with graphCNN.as_default():
with tf.device('/gpu:0'):
x = tf.placeholder(tf.float32, shape=(None, 224,224,3)) # X
y_ = tf.placeholder(tf.float32, shape=(None, num_labels)) # Y_
dropout = tf.placeholder(tf.float32)
if dropout == 1.0:
keep_prob = tf.constant([0.2, 0.3, 0.5], dtype=tf.float32)
else:
keep_prob = tf.constant([1.0, 1.0, 1.0], dtype=tf.float32)
weights_1 = weight_variable([cnn1,cnn1,3, hidden_size[0]])
biases_1 = bias_variable([hidden_size[0]])
weights_2 = weight_variable([cnn2,cnn2,hidden_size[0], hidden_size[1]])
biases_2 = bias_variable([hidden_size[1]])
weights_3 = weight_variable([cnn3,cnn3,hidden_size[1], hidden_size[2]])
biases_3 = bias_variable([hidden_size[2]])
weights_4 = weight_variable([56 * 56 * hidden_size[2], hidden_size[3]])
biases_4 = bias_variable([hidden_size[3]])
weights_5 = weight_variable([hidden_size[3], num_labels])
biases_5 = bias_variable([num_labels])
def model(data):
with tf.device('/gpu:1'):
train_hidden_1 = tf.nn.relu(conv2d(data, weights_1) + biases_1)
train_hidden_2 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_1, weights_2) + biases_2))
train_hidden_2_drop = tf.nn.dropout(train_hidden_2, keep_prob[0])
train_hidden_3 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_2_drop, weights_3) + biases_3))
train_hidden_3_drop = tf.nn.dropout(train_hidden_3, keep_prob[1])
train_hidden_3_drop = tf.reshape(train_hidden_3_drop,[-1, 56 * 56 * hidden_size[2]])
train_hidden_4 = tf.nn.relu(tf.matmul(train_hidden_3_drop, weights_4) + biases_4)
train_hidden_4_drop = tf.nn.dropout(train_hidden_4, keep_prob[2])
logits = tf.matmul(train_hidden_4_drop, weights_5) + biases_5
return logits
t_train_labels = tf.argmax(y_, 1) # From one-hot (one and zeros) vectors to values
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model(x), labels=t_train_labels))
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
y = tf.nn.softmax(model(x))
### RUNNING
print("log address: %s" % (log_addr))
#num_steps = 10001
times_repeat = 20 # number of epochs
batch_size = 100
with tf.Session(graph=graphCNN,config=tf.ConfigProto(log_device_placement=True)) as session:
tf.initialize_all_variables().run()
saver = tf.train.Saver(max_to_keep=0)
writemyfile("---ARCHIVES_M1----")
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
writemyfile(str("\nTime: %s \nLayers: %d,%d,%d \epochs: %d" % (mytime,cnn1,cnn2,cnn3,times_repeat)))
writemyfile("Train files:" + str(listfiles_train))
writemyfile("Valid files:" + str(listfiles_valid))
writemyfile("Test files:" + str(listfiles_test))
print("Model will be saved in file: %s" % save_model_path)
writemyfile(str("Model will be saved in file: %s" % save_model_path))
### TRAINING & VALIDATION
valid_accuracies_epochs = np.array([])
for time_repeat in range(times_repeat):
print("- time_repeat:",time_repeat)
writemyfile("- time_repeat:"+str(time_repeat))
for file_train in listfiles_train:
file_train_id = int(file_train[0:len(file_train)-4])
time_start_this_file = time.time()
#LOAD DATA
print("- - file:",file_train_id, end=' ')
writemyfile("- - file:" + str(file_train_id))
Data_train, Label_train= PrepareData(file_train)
for step in range(0,len(Data_train)-batch_size,batch_size):
batch_data = Data_train[step:step+batch_size]
batch_labels = Label_train[step:step+batch_size]
feed_dict = {x : batch_data, y_ : batch_labels, dropout: 1.0}
opti, l, predictions = session.run([optimizer, loss, y], feed_dict=feed_dict)
train_accuracies = np.array([])
for index_tr_accu in range(0,len(Data_train)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID])
train_accuracies = np.r_[train_accuracies,current_accuracy]
train_accuracy = train_accuracies.mean()
print("batch accu: %.2f%%" %(train_accuracy),end=" | ")
writemyfile("batch accu: %.2f%%" %(train_accuracy))
time_done_this_file = time.time() - time_start_this_file
print("time: %.2fs" % (time_done_this_file))
writemyfile("time: %.2fs" % (time_done_this_file))
# save model
model_addr = save_model_path + "model335" + "-epoch-" + str(time_repeat) + ".ckpt"
save_path = saver.save(session, model_addr,) # max_to_keep default was 5
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print("epoch finished at %s \n model address: %s" % (mytime,model_addr))
writemyfile("epoch finished at %s \n model address: %s" % (mytime,model_addr))
# validation
valid_accuracies = np.array([])
for file_valid in listfiles_valid:
file_valid_id = int(file_valid[0:len(file_valid)-4])
Data_valid, Label_valid = PrepareData(file_valid)
for index_vl_accu in range(0,len(Data_valid)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID])
valid_accuracies = np.r_[valid_accuracies,current_accuracy]
valid_accuracy = valid_accuracies.mean()
print("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
writemyfile("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
valid_accuracies_epochs = np.hstack([valid_accuracies_epochs,valid_accuracy])
print('Done!!')
writemyfile(str('Done!!'))
session.close()
Update: I found cifar10_multi_gpu_train.py seems to be a good example for training with multi GPUs, but honestly I don't know how to apply on my case.
I think you need to change
def model(data):
with tf.device('/gpu:1'):
to:
def model(data):
for d in ['/gpu:0', '/gpu:1']:
with tf.device(d):
and ditch the line with tf.device('/gpu:0'):
Since at the first with tf.device... you are only doing initiation
of variables and then you are resetting your devices with the next with tf.device.
Let me know if this works since I can't test it.

Language Modelling with RNN and LSTM Cell in Tensorflow

My RNN for language modelling is predicting only "the" "and" and "unknown" what's wrong with my code?
Here I define the hyper parameters:
num_epochs = 300
total_series_length = len(uniqueSentence) - 4
truncated_backprop_length = 30
state_size = 100
num_classes = NUM_MEANINGFUL + 1
echo_step = 1
batch_size = 32
vocab_length = len(decoder)
num_batches = total_series_length//batch_size//truncated_backprop_length
learning_rate = 0.01
old_perplexity = 0
Here I generate the data (my input is given by word embeddings long 100 calculated with Word2Vec):
def generateData():
uniqueSent = uniqueSentence[0 : len(uniqueSentence) - 4]
x_tr = np.array([model_ted[word] for words in uniqueSent])
#Roll array elements along a given axis.
#Elements that roll beyond the last position are re-introduced at the first.
x_tr = x_tr.reshape((100, batch_size, -1)) # The first index changing slowest, subseries as rows
x = x_tr.transpose((1, 2, 0))
print("hi")
new_y = indexList[1: len(indexList)- 4]
new_y.append(indexList[len(indexList)-3])
y = np.array(new_y)
print(len(y))
y = y.reshape((batch_size, -1))
return (x, y)
Define the placeholders:
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length, 100])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])
W = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
Inputs and desired outputs:
labels_series = tf.transpose(batchY_placeholder)
labels_series = tf.unstack(batchY_placeholder, axis=1)
inputs_series = batchX_placeholder
Forward pass:
from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
print(tf.__version__)
#cell = tf.contrib.rnn.BasicRNNCell(state_size)
cell = tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple = False)
print(cell.state_size)
init_state = tf.zeros([batch_size, cell.state_size])
outputs, current_state = tf.nn.dynamic_rnn(cell, inputs_series, initial_state = init_state)
iterable_outputs = tf.unstack(outputs, axis = 1)
logits_series = [tf.matmul(state, W2) + b2 for state in iterable_outputs] #Broadcasted addition
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]
losses = [tf.losses.sparse_softmax_cross_entropy(labels, logits)
for logits, labels in zip(logits_series, labels_series)]
total_loss = tf.add_n(losses)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
x,y = generateData()
del(model_ted)
Training:
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
sess.run(tf.initialize_all_variables())
loss_list = []
print("start")
_current_state = np.zeros((batch_size, 2*state_size))
#avevo genrateData fuori e -currentstate dentro
for epoch_idx in range(num_epochs):
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx,:]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state, _predictions_series = sess.run(
[total_loss, train_step, current_state, predictions_series],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
del(batchX)
del(batchY)
perplexity = 2 ** (_total_loss/truncated_backprop_length )
print(perplexity)
del(perplexity)
_predictions_series = np.array(_predictions_series)
pr = _predictions_series.transpose([1, 0, 2])
pr_ind = []
for line in pr[0]:
pr_ind.append(np.argmax(line))
for index in pr_ind:
print(decoder[index], end = " " )
del(pr_ind)
print("\n learning rate: ", end = " ")
print(learning_rate)

Resources