Image normalization - machine-learning

In my perspective, image normalization is to make every pixel to be normalized with an value between 0 and 1, am I right?
But what does the following code mean?
image_size = 28 # Pixel width and height.
pixel_depth = 255.0 # Number of levels per pixel.
for image in image_files:
image_file = os.path.join(folder, image)
try:
image_data = (ndimage.imread(image_file).astype(float) -
pixel_depth / 2) / pixel_depth # WHY ??
if image_data.shape != (image_size, image_size):
raise Exception('Unexpected image shape: %s' % str(image_data.shape))
dataset[num_images, :, :] = image_data
num_images = num_images + 1
except IOError as e:
print('Could not read:', image_file, ':', e, '- it\'s ok, skipping.')

Image normalization is merely the process of changing the range of pixel intensity values.
The choice of the new range is up to you.
In the case you've shown, it looks like the range -0.5 .. 0.5 has been chosen.

Related

error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize' OpenCV

I have this old code that is used to run fine in Python 2.7 a while ago. I just updated the code to run in Python 3.8, but when I try to execute it code in Python 3.8 and OpenCV 3.4 I get a resize error and a warning (below)!
Here is the link to the two tif images that are required to run this code.
It's worth noting that both tif images are in the same folder as the Python code
import cv2
import matplotlib.pyplot as plt
import numpy as np
## Code for C_preferred Mask and C_images##
## There are three outputs to this code:
#"Block_order_C.PNG"
#"Out_img.PNG"
#"Output_C.txt"
## Change the image name here
filename_image = '2.tif'
filename_mask = '1.tif'
## OpenCV verison Checking
#print 'OpenCV version used', cv2.__version__
filename = open("Output_C.txt","w")
filename.write("Processing Image : " + str(filename_image) + '\n\n')
## Function to sort the contours : Parameters that you can tune : tolerance_factor and size 0f the image.Here, I have used a fix size of
## (800,800)
def get_contour_precedence(contour, cols):
tolerance_factor = 10
origin = cv2.boundingRect(contour)
return ((origin[1] // tolerance_factor) * tolerance_factor) * cols + origin[0]
## Loading the colored mask, resizing it to (800,800) and converting it from RGB to HSV space, so that the color values are emphasized
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask),(800,800)),cv2.COLOR_RGB2HSV);
# Loading the original Image
b_image_1 = cv2.resize(cv2.imread(filename_image),(800,800));
cv2.imshow("c_mask_preferred",p_mask_c)
cv2.waitKey();
# convert the target color to HSV, As our target mask portion to be considered is green. So I have chosen target color to be green
b = 0;
g = 255;
r = 0;
# Converting target color to HSV space
target_color = np.uint8([[[b, g, r]]])
target_color_hsv = cv2.cvtColor(target_color, cv2.COLOR_BGR2HSV)
# boundaries for Hue define the proper color boundaries, saturation and values can vary a lot
target_color_h = target_color_hsv[0,0,0]
tolerance = 20
lower_hsv = np.array([max(0, target_color_h - tolerance), 10, 10])
upper_hsv = np.array([min(179, target_color_h + tolerance), 250, 250])
# apply threshold on hsv image
mask = cv2.inRange(p_mask_c, lower_hsv, upper_hsv)
cv2.imshow("mask",mask)
cv2.waitKey()
# Eroding the binary mask, such that every white portion (grids) are seperated from each other, to avoid overlapping and mixing of
# adjacent grids
b_mask = mask;
kernel = np.ones((5,5))
#kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
sharp = cv2.erode(b_mask,kernel, iterations=2)
# Finding all the grids (from binary image)
contours, hierarchy = cv2.findContours(sharp,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
print (' Number of contours', len(contours))
# Sorting contours
contours.sort(key=lambda x:get_contour_precedence(x, np.shape(b_mask)[0]))
#cv2.drawContours(b_image_1, contours, -1, (0,255,0), 1)
# Label variable for each grid/panel
label = 1;
b_image = b_image_1.copy();
temp =np.zeros(np.shape(b_image_1),np.uint8)
print (' size of temp',np.shape(temp), np.shape(b_image))
out_img = b_image_1.copy()
# Processing in each contour/label one by one
for cnt in contours:
cv2.drawContours(b_image_1,[cnt],0,(255,255,0), 1)
## Just to draw labels in the center of each grid
((x, y), r) = cv2.minEnclosingCircle(cnt)
x = int(x)
y = int(y)
r = int(r)
cv2.putText(b_image_1, "#{}".format(label), (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
##
cv2.drawContours(temp,[cnt],0,(255,255,255), -1)
#crop_img = np.bitwise_and(b_image,temp)
r = cv2.boundingRect(cnt)
crop_img = b_image[r[1]:r[1]+r[3], r[0]:r[0]+r[2]]
mean = cv2.mean(crop_img);
mean = np.array(mean).reshape(-1,1)
print (' Mean color', mean, np.shape(mean))
if mean[1] < 50:
cv2.putText(out_img, "M", (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
filename.write("Block number #"+ str(label)+ ' is : ' + 'Magenta'+'\n');
else:
cv2.putText(out_img, "G", (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
filename.write("Block number #"+ str(label)+ ' is : ' +'Gray'+'\n');
label = label+1;
cv2.imwrite("Block_order_C.PNG",b_image_1)
cv2.imwrite("Out_img.PNG",out_img)
filename.close()
cv2.imshow("preferred",b_image_1)
cv2.waitKey()
Error
[ WARN:0] global C:\projects\opencv-python\opencv\modules\imgcodecs\src\grfmt_tiff.cpp (449) cv::TiffDecoder::readData OpenCV TIFF: TIFFRGBAImageOK: Sorry, can not handle images with IEEE floating-point samples
Traceback (most recent call last):
File "Processing_C_preferred.py", line 32, in
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask),(800,800)),cv2.COLOR_RGB2HSV);
cv2.error: OpenCV(4.2.0) C:\projects\opencv-python\opencv\modules\imgproc\src\resize.cpp:4045: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'
When you read in the image pass the cv::IMREAD_ANYDEPTH = 2 parameter as the second parameter in cv2.imread().
Changing your lines to
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask, 2),(800,800)),cv2.COLOR_RGB2HSV);
and
b_image_1 = cv2.resize(cv2.imread(filename_image, 2),(800,800));
removes the resize error you're seeing.
But you get another error when changing the color since your TIFF image apparently has only one channel so cv2.COLOR_RGB2HSV won't work..
You could also use multiple flags like cv::IMREAD_COLOR = 1,
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask, 2 | 1),(800,800)),cv2.COLOR_BGR2HSV);
to read in a color image. But you get a different error. Perhaps you understand this image better than I do and can solve the problem from here on out.

Putting my pictures over black ground not working OpenCV

So this is what I have now:
As you can see, the neural style transfer thing is only going over the area the detection box is detecting. I am trying to put the transformed cool picture (which will always be less than 1200 x 900 because the detection box is 1200 x 900) in a black picture with dimensions 1200 x 900 so that I can save the video file.
My box is measured with: startX, endX, startY, and endY. The way I am trying to put the cool picture over the background right now is: black_background[startY:endY, startX:endX] = output, where output also has the size (endY - startY, endX - startX).
My way is not working, any insights? And also, for some reason, when I do "*black_background[startY:endY, startX:endX] = output", there is often a few pixel off broadcasting issue, like can't add (859, 100, 3) with (860, 100, 3). Is there a non-buggy solution to the black background issue? I feel like manually doing *black_background[startY:endY, startX:endX] = output is weird.
Here's my full code, I marked the if loop that actually matters with -----, thank you!
from __future__ import print_function
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
from imutils import paths
import itertools
# We need to input model prototxt
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
help="minimum probability to filter weak detections")
ap.add_argument("-nm", "--neuralmodels", required=True,
help="path to directory containing neural style transfer models")
args = vars(ap.parse_args())
# we should identify the class first, and then transfer that block
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
DetectionNet = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# grab the paths to all neural style transfer models in our 'models'
# directory, provided all models end with the '.t7' file extension
modelPaths = paths.list_files(args["neuralmodels"], validExts=(".t7",))
modelPaths = sorted(list(modelPaths))
# generate unique IDs for each of the model paths, then combine the
# two lists together
models = list(zip(range(0, len(modelPaths)), (modelPaths)))
# use the cycle function of itertools that can loop over all model
# paths, and then when the end is reached, restart again
modelIter = itertools.cycle(models)
(modelID, modelPath) = next(modelIter)
NTSnet = cv2.dnn.readNetFromTorch(modelPath)
# initialize the video stream, allow the cammera sensor to warmup,
# and initialize the FPS counter
print("[INFO] starting video stream...")
vs = VideoStream(src=1).start()
fps = FPS().start()
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter('output.avi', fourcc, 20.0, (1200, 900))
while True:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
frame = vs.read()
frame = imutils.resize(frame, width=1200, height=900)
# grab the frame dimensions and convert it to a blob
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)),
0.007843, (300, 300), 127.5)
# pass the blob through the network and obtain the detections and
# predictions
DetectionNet.setInput(blob)
detections = DetectionNet.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# extract the index of the class label from the
# `detections`, then compute the (x, y)-coordinates of
# the bounding box for the object
idx = int(detections[0, 0, i, 1])
if(CLASSES[idx] == "person" and confidence > .90):
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# draw the prediction on the frame
label = "{}: {:.2f}%".format("PERSON",
confidence * 100)
cv2.rectangle(frame, (startX, startY), (endX, endY),
COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(frame, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
# print box area in background
newimage = frame[startY:endY, startX:endX]
(h, w) = newimage.shape[:2]
#print(h,w)
#print(startX, endX, startY, endY)
noise_picture = cv2.imread('white_noise.jpg')
black_background = cv2.imread('black.png')
-------------------------------------------------------------------
*if(h > 0 and w > 0):
# to_be_transformed is the detection box area
# resize that area for MobileNetSSD
#to_be_transformed = imutils.resize(to_be_transformed, height=450)
(height_orig, width_orig) = noise_picture.shape[:2]
noise_picture[startY:endY, startX:endX] = newimage
noise_picture = imutils.resize(noise_picture, height=450)
# run it through the network, output is the image
(h, w) = noise_picture.shape[:2]
# print(h, w)
blob2 = cv2.dnn.blobFromImage(noise_picture, 1.0, (w, h), (103.939, 116.779, 123.680), swapRB=False, crop=False)
NTSnet.setInput(blob2)
output = NTSnet.forward()
output = output.reshape((3, output.shape[2], output.shape[3]))
output[0] += 103.939
output[1] += 116.779
output[2] += 123.680
output /= 255.0
output = output.transpose(1, 2, 0)
# set the 600 x 450 back to the original size
black_background = imutils.resize(black_background, width=1200, height = 900)
output = imutils.resize(output, width=1200)
#black_background[startY:endY, startX:endX] = output[startY:endY, startX:endX]
output = output[startY:endY, startX:endX]
(h2, w2) = output.shape[:2]
if(h2>0 and w2>0 ):
cv2.imshow('hmm', output)
black_background[startY:endY, startX:endX] = output
cv2.imshow("uh", black_background)
#output_video.write(black_background)
#output_video.write(background)*
---------------------------------------------------------------
# show the output frame, which is the whole thing
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# update the FPS counter
fps.update()
# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
Oh man, second time I made this mistake. You have to do * 255 when you are adding your output picture to your background. This is really weird, it seems like imread works if you only put numbers in [0, 1], but once you have a value that goes over 1, it treats the range as [0, 255], don't take my words on it though.

imshow seems to show the wrong image (not duplicate)

I am trying to show image using cv2.imshow(), but batch_data (original image) got altered after I concatenate it with data (set of all image). I am note sure how original image is altered.
data = np.array([]).reshape([0, IMG_WIDTH , IMG_HEIGHT ,IMG_DEPTH ])
label = np.array([])
batch_label = np.array([255]).reshape(1) #label number represent background
x = True
for (n,address) in enumerate(address_list):
print("Reading all images with background from ", address)
batch_data = cv2.imread(image_dir + address)
dim = (IMG_WIDTH ,IMG_HEIGHT)
if batch_data is not None:
batch_data = cv2.resize(batch_data,dim, interpolation = cv2.INTER_NEAREST)
else:
print("batch_data is not read.")
batch_data = np.expand_dims(batch_data, axis= 0)
data = np.concatenate((data,batch_data))
label = np.concatenate((label,batch_label))
while x:
print("batch_data.shape",batch_data.shape)
print("data.shape", data.shape)
print((np.squeeze(batch_data, axis=0) == data[n,...]).all()) # return true
cv2.imshow('image', np.squeeze(batch_data, axis= 0)) # show original image
cv2.imshow('image2', data[n,...]) #show original image but color is alter to white and red
cv2.waitKey(0)
cv2.destroyAllWindows()
x = False
I think cv2.imshow('image2', data[n,...]) show the original image because I tried using transpose to swap axis=1 to axis=2, and the red spot is moved accordingly. I may be wrong.
Can anyone spot the mistake? I feel like it is going to be a very stupid mistake, but I just could not find it.
I think this is a data type problem.
Try to change data from float64 to uint8:
data = np.array([], dtype=np.uint8).reshape([0, IMG_WIDTH , IMG_HEIGHT ,IMG_DEPTH])
The white and red are signs that indicate saturation. float64 range is expected to be [0, 1], while uint8 is expected to be [0, 255]. You can find more about this problem here.

How to split the image into chunks without breaking character - python

I am trying to read image from the text.
I am getting better result if I break the images into small chunks but the problem is when i try to split the image it is cutting/slicing my characters.
code I am using :
from __future__ import division
import math
import os
from PIL import Image
def long_slice(image_path, out_name, outdir, slice_size):
"""slice an image into parts slice_size tall"""
img = Image.open(image_path)
width, height = img.size
upper = 0
left = 0
slices = int(math.ceil(height/slice_size))
count = 1
for slice in range(slices):
#if we are at the end, set the lower bound to be the bottom of the image
if count == slices:
lower = height
else:
lower = int(count * slice_size)
#set the bounding box! The important bit
bbox = (left, upper, width, lower)
working_slice = img.crop(bbox)
upper += slice_size
#save the slice
working_slice.save(os.path.join(outdir, "slice_" + out_name + "_" + str(count)+".png"))
count +=1
if __name__ == '__main__':
#slice_size is the max height of the slices in pixels
long_slice("/python_project/screenshot.png","longcat", os.getcwd(), 100)
Sample Image : The image i want to process
Expected/What i am trying to do :
I want to split every line as separate image without cutting the character
Line 1:
Line 2:
Current result:Characters in the image are cropped
I dont want to cut the image based on pixels since each document will have separate spacing and line width
Thanks
Jk
Here is a solution that finds the brightest rows in the image (i.e., the rows without text) and then splits the image on those rows. So far I have just marked the sections, and am leaving the actual cropping up to you.
The algorithm is as follows:
Find the sum of the luminance (I am just using the red channel) of every pixel in each row
Find the rows with sums that are at least 0.999 (which is the threshold I am using) as bright as the brightest row
Mark those rows
Here is the code that will return a list of these rows:
def find_lightest_rows(img, threshold):
line_luminances = [0] * img.height
for y in range(img.height):
for x in range(img.width):
line_luminances[y] += img.getpixel((x, y))[0]
line_luminances = [x for x in enumerate(line_luminances)]
line_luminances.sort(key=lambda x: -x[1])
lightest_row_luminance = line_luminances[0][1]
lightest_rows = []
for row, lum in line_luminances:
if(lum > lightest_row_luminance * threshold):
lightest_rows.add(row)
return lightest_rows
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ... ]
After colouring these rows red, we have this image:

Emgu CV - Anisotropic Diffusion

Can anybody guide me to some existing implementations of anisotropic diffusion, preferably the perona-malik diffusion?
translate the following MATLAB code :
% pm2.m - Anisotropic Diffusion routines
function ZN = pm2(ZN,K,iterate);
[m,n] = size(ZN);
% lambda = 0.250;
lambda = .025;
%K=16;
rowC = [1:m]; rowN = [1 1:m-1]; rowS = [2:m m];
colC = [1:n]; colE = [2:n n]; colW = [1 1:n-1];
result_save=0;
for i = 1:iterate,
%i;
% result=PSNR(Z,ZN);
% if result>result_save
% result_save=result;
% else
% break;
% end
deltaN = ZN(rowN,colC) - ZN(rowC,colC);
deltaS = ZN(rowS,colC) - ZN(rowC,colC);
deltaE = ZN(rowC,colE) - ZN(rowC,colC);
deltaW = ZN(rowC,colW) - ZN(rowC,colC);
% deltaN = deltaN .*abs(deltaN<K);
% deltaS = deltaS .*abs(deltaS<K);
% deltaE = deltaE .*abs(deltaE<K);
% deltaW = deltaW .*abs(deltaW<K);
fluxN = deltaN .* exp(-((abs(deltaN) ./ K).^2) );
fluxS = deltaS .* exp(-((abs(deltaS) ./ K).^2) );
fluxE = deltaE .* exp(-((abs(deltaE) ./ K).^2) );
fluxW = deltaW .* exp(-((abs(deltaW) ./ K).^2) );
ZN = ZN + lambda*(fluxN +fluxS + fluxE + fluxW);
%ZN=max(0,ZN);ZN=min(255,ZN);
end
the code is not mine and has been taken from: http://www.csee.wvu.edu/~xinl/code/pm2.m
OpenCV Implementation (It needs 3 channel image):
from cv2.ximgproc import anisotropicDiffusion
ultrasound_ad_cv2 = anisotropicDiffusion(im,0.075 ,80, 100)
Juxtapose comparison
From scratch in Python: (For grayscale image only)
import scipy.ndimage.filters as flt
import numpy as np
import warnings
def anisodiff(img,niter=1,kappa=50,gamma=0.1,step=(1.,1.),sigma=0, option=1,ploton=False):
"""
Anisotropic diffusion.
Usage:
imgout = anisodiff(im, niter, kappa, gamma, option)
Arguments:
img - input image
niter - number of iterations
kappa - conduction coefficient 20-100 ?
gamma - max value of .25 for stability
step - tuple, the distance between adjacent pixels in (y,x)
option - 1 Perona Malik diffusion equation No 1
2 Perona Malik diffusion equation No 2
ploton - if True, the image will be plotted on every iteration
Returns:
imgout - diffused image.
kappa controls conduction as a function of gradient. If kappa is low
small intensity gradients are able to block conduction and hence diffusion
across step edges. A large value reduces the influence of intensity
gradients on conduction.
gamma controls speed of diffusion (you usually want it at a maximum of
0.25)
step is used to scale the gradients in case the spacing between adjacent
pixels differs in the x and y axes
Diffusion equation 1 favours high contrast edges over low contrast ones.
Diffusion equation 2 favours wide regions over smaller ones.
"""
# ...you could always diffuse each color channel independently if you
# really want
if img.ndim == 3:
warnings.warn("Only grayscale images allowed, converting to 2D matrix")
img = img.mean(2)
# initialize output array
img = img.astype('float32')
imgout = img.copy()
# initialize some internal variables
deltaS = np.zeros_like(imgout)
deltaE = deltaS.copy()
NS = deltaS.copy()
EW = deltaS.copy()
gS = np.ones_like(imgout)
gE = gS.copy()
# create the plot figure, if requested
if ploton:
import pylab as pl
from time import sleep
fig = pl.figure(figsize=(20,5.5),num="Anisotropic diffusion")
ax1,ax2 = fig.add_subplot(1,2,1),fig.add_subplot(1,2,2)
ax1.imshow(img,interpolation='nearest')
ih = ax2.imshow(imgout,interpolation='nearest',animated=True)
ax1.set_title("Original image")
ax2.set_title("Iteration 0")
fig.canvas.draw()
for ii in np.arange(1,niter):
# calculate the diffs
deltaS[:-1,: ] = np.diff(imgout,axis=0)
deltaE[: ,:-1] = np.diff(imgout,axis=1)
if 0<sigma:
deltaSf=flt.gaussian_filter(deltaS,sigma);
deltaEf=flt.gaussian_filter(deltaE,sigma);
else:
deltaSf=deltaS;
deltaEf=deltaE;
# conduction gradients (only need to compute one per dim!)
if option == 1:
gS = np.exp(-(deltaSf/kappa)**2.)/step[0]
gE = np.exp(-(deltaEf/kappa)**2.)/step[1]
elif option == 2:
gS = 1./(1.+(deltaSf/kappa)**2.)/step[0]
gE = 1./(1.+(deltaEf/kappa)**2.)/step[1]
# update matrices
E = gE*deltaE
S = gS*deltaS
# subtract a copy that has been shifted 'North/West' by one
# pixel. don't as questions. just do it. trust me.
NS[:] = S
EW[:] = E
NS[1:,:] -= S[:-1,:]
EW[:,1:] -= E[:,:-1]
# update the image
imgout += gamma*(NS+EW)
if ploton:
iterstring = "Iteration %i" %(ii+1)
ih.set_data(imgout)
ax2.set_title(iterstring)
fig.canvas.draw()
# sleep(0.01)
return imgout
Usage
:
#anisodiff(img,niter=1,kappa=50,gamma=0.1,step=(1.,1.),sigma=0, option=1,ploton=False)
us_im_ad = anisodiff(ultrasound,100,80,0.075,(1,1),2.5,1)
Source
Juxtapose comparison

Resources