Pixels regions comparision - gimp

I'm trying to write a python script for GIMP, who's aim is to slice a picture into a tileset (identify each unique 16x16 tiles in a picture).
So far, I'm able to read tiles (in fact a 16x16 pixels region) and write it somewhere.
But all my attempts at comparing tiles failed.
Did I miss Something ?
My script is as follow:
#!/usr/bin/env python
from gimpfu import *
# compare 2 tiles,
# return 1 if identical, false otherwise
def tileCompare(tile1, tile2):
if(tile1 == tile2):
return 1
return 0
# return tile at (x, y) coordinates
def readTile(layer, x, y):
pr = layer.get_pixel_rgn(x,y,16,16)
return pr[x:x+16, y:y+16]
# write tile at (x, y) coordinates on given layer
def writeTile(layer, x, y, tile):
pr = layer.get_pixel_rgn(x,y,16,16)
pr[x:x+16, y:y+16] = tile
def TilesSlicer(sourceLayer, targetLayer):
# Actual plug-in code will go here
# iterate tiles (result in tileSource)
for x in range(0, sourceLayer.width, 16):
for y in range(0, sourceLayer.height, 16):
tileSource = readTile(sourceLayer, x, y)
found = 0
# iterate tiles again (result in tileIterator)
for a in range(0, sourceLayer.width, 16):
for b in range(0, sourceLayer.height, 16):
tileIterator = readTile(sourceLayer, x, y)
# compare tiles
# if identical and not yet found
# write it in the target layer
if (tileCompare(tileSource, tileIterator) == 1):
if(found == 0):
writeTile(tileIterator, a, b, tileSource)
found = 1
register(
"TilesSlicer",
"Tiles slicer",
"Slice a picture into tiles",
"Fabrice Lambert",
"Fabrice Lambert",
"April 2019",
"Tiles slicer...",
"RGB*",
[
(PF_DRAWABLE, "sourceLayer", "Source Layer: ", None),
(PF_DRAWABLE, "targetLayer", "Target Layer: ", None),
],
[],
TilesSlicer,
menu="<Image>/Filters/My Scripts")
main()
Thanks for your suggestions.

Nvm,
I found the problem:
tileIterator = readTile(sourceLayer, a, b)
instead of:
tileIterator = readTile(sourceLayer, x, y)

Alright,
After refining a bit, the script is as follow:
- Added tiles width and height to handle any tile size.
- Removed target layer parameter, the script now create it.
- Added real time display to give feedbacks to the user (sadly, progress bar doesn't work).
- Improved speed.
#!/usr/bin/env python
from gimpfu import *
# compare 2 tiles,
# return 1 if identical, 0 otherwise
def tileCompare(tile1, tile2):
if(tile1 == tile2):
return 1
return 0
# return tile at (x, y) coordinates
def readTile(layer, x, y, width, height):
pr = layer.get_pixel_rgn(x, y, width, height)
return pr[x:x+width, y:y+height]
# write tile at (x, y) coordinates on given layer
def writeTile(layer, x, y, width, height, tile):
pr = layer.get_pixel_rgn(x, y, width, height)
pr[x:x+width, y:y+height] = tile
layer.update(x, y, width, height)
gimp.displays_flush()
def TilesSlicer(sourceLayer, tileWidth, tileHeight):
# Actual plug-in code will go here
if((sourceLayer.width % tileWidth) != 0):
gimp.message("The layer width is not multiple of " + str(tileWidth))
gimp.quit()
if((sourceLayer.height % tileWidth) != 0):
gimp.message("The layer height is not multiple of " + str(tileHeight))
gimp.quit()
totalTiles = (sourceLayer.width / tileWidth) * (sourceLayer.height / tileHeight)
tilesProcessed = 0
gimp.progress_init("Processing...")
gimp.progress_update(0.0)
sourceImage = sourceLayer.image
targetLayer = pdb.gimp_layer_new(sourceImage, sourceLayer.width, sourceLayer.height, sourceImage.base_type, "Target", 100.0, sourceLayer.mode)
targetLayer.add_alpha()
targetLayer.fill(TRANSPARENT_FILL)
sourceImage.add_layer(targetLayer, 0)
# iterate tiles (result in tileSource)
for x in range(0, sourceLayer.width, tileWidth):
for y in range(0, sourceLayer.height, tileHeight):
tileSource = readTile(sourceLayer, x, y, tileWidth, tileHeight)
found = 0
# iterate tiles again (result in tileIterator)
for a in range(0, sourceLayer.width, tileWidth):
for b in range(0, sourceLayer.height, tileHeight):
tileIterator = readTile(sourceLayer, a, b, tileWidth, tileHeight)
# compare tiles
# if identical and not yet found
# write it in the target layer
# and abort iteration (for speed purpose)
if (tileCompare(tileSource, tileIterator) == 1):
if(found == 0):
writeTile(targetLayer, a, b, tileWidth, tileHeight, tileIterator)
found = 1
break
if(found == 1):
break
tilesProcessed = tilesProcessed + 1
gimp.progress_update(tilesProcessed / totalTiles)
gimp.displays_flush()
register(
"TilesSlicer",
"Tiles slicer",
"Slice a picture into tiles",
"Fabrice Lambert",
"Fabrice Lambert",
"April 2019",
"Tiles slicer...",
"RGB*",
[
(PF_DRAWABLE, "sourceLayer", "Source Layer: ", None),
(PF_INT8, "tileWidth", "Tile width: ", 16),
(PF_INT8, "tileHeight", "Tile height: ", 16),
],
[],
TilesSlicer,
menu="<Image>/Filters/My Scripts")
main()
It can probably be refined better, and if someone have anything to deal with the progress bar, let me know.
I'm open to suggestions.

Related

Why does Tesseract fail to recognize 6 out of 26 of my alphabetic keyboard keys even with several parameter tunings?

TL;DR I'm using:
adaptive thresholding
segmenting by keys (width/height ratio) - see green boxes in image result
psm 10 to treat each key as a character
but it fails to recognize some keys, falsely identifies others or identifies 2 for 1 char (see the L character in the image result, it's an L and P), etc.
Note: I cropped the image and re-ran the results to get it to fit on this site, but before cropping it did slightly better (recognized more keys, fewer false positives, etc).
I just want it to recognize the alphabet keys. Ultimately I will want it to work for realtime video.
config:
'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
I've tried scaling the image differently, scaling the individual key segments, using opening/closing/etc but it doesn't recognize all the keys.
original image
image result
Update: new results if I make the image straighter (bird's eye) and remove the whitelisting, it manages to detect all for the most part (although it thinks the O is a 0 and the I is a |, which is understandable). Why is this and how could I make this adaptive enough for a dynamic video when it is so sensitive to these conditions?
Code:
import pytesseract
import numpy as np
try:
from PIL import Image
except ImportError:
import Image
import cv2
from tqdm import tqdm
from collections import defaultdict
def get_missing_chars(dict):
capital_alphabet = [chr(ascii) for ascii in range(65, 91)]
return [let for let in capital_alphabet if let not in dict]
def draw_box_and_char(img, contour_dims, c, box_col, text_col):
x, y, w, h = contour_dims
top_left = (x, y)
bot_right = (x + w, y+h)
font_offset = 3
text_pos = (x+h//2+12, y+h-font_offset)
img_copy = img.copy()
cv2.rectangle(img_copy, top_left, bot_right, box_col, 2)
cv2.putText(img_copy, c, text_pos, cv2.FONT_HERSHEY_SIMPLEX, fontScale=.5, color=text_col, thickness=1, lineType=cv2.LINE_AA)
return img_copy
def detect_keys(img):
scaling = .25
img = cv2.resize(img, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_AREA)
print("img shape", img.shape)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ratio_min = 0.7
area_min = 1000
nbrhood_size = 1001
bias = 2
# adapt to different lighting
bin_img = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY_INV, nbrhood_size, bias)
items = cv2.findContours(bin_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
key_contours = []
for c in contours:
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
# square-like ratio, try to get character
if ratio > ratio_min and area > area_min:
key_contours.append(c)
detected = defaultdict(int)
n_kept = 0
img_copy = cv2.cvtColor(bin_img, cv2.COLOR_GRAY2RGB)
let_to_contour = {}
n_contours = len(key_contours)
# offset to get smaller square within the key segment for easier char recognition
offset = 10
show_each_char = False
for _, c in tqdm(enumerate(key_contours), total=n_contours):
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
base = np.zeros(bin_img.shape, dtype=np.uint8)
base.fill(255)
n_kept += 1
new_y = y+offset
new_x = x+offset
new_h = h-2*offset
new_w = w-2*offset
base[new_y:new_y+new_h, new_x:new_x+new_w] = bin_img[new_y:new_y+new_h, new_x:new_x+new_w]
segment = cv2.bitwise_not(base)
# try scaling up individual keys
# scaling = 2
# segment = cv2.resize(segment, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_CUBIC)
# psm 10: treats the segment as a single character
custom_config = r'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
d = pytesseract.image_to_data(segment, config=custom_config, output_type='dict')
conf = d['conf']
c = d['text'][-1]
if c:
# sometimes recognizes multiple keys even though there is only 1
for sub_c in c:
# save character and contour to draw on image and show bounds/detection
if sub_c not in let_to_contour or (sub_c in let_to_contour and conf > let_to_contour[sub_c]['conf']):
let_to_contour[sub_c] = {'conf': conf, 'cont': (new_x, new_y, new_w, new_h)}
else:
c = "?"
text_col = (0, 0, 255)
if show_each_char:
contour_dims = (new_x, new_y, new_w, new_h)
box_col = (0, 255, 0)
text_col = (0, 0, 0)
segment_with_boxes = draw_box_and_char(segment, contour_dims, c, box_col, text_col)
cv2.imshow('segment', segment_with_boxes)
cv2.waitKey(0)
cv2.destroyAllWindows()
# draw boxes around recognized keys
for c, data in let_to_contour.items():
box_col = (0, 255, 0)
text_col = (0, 0, 0)
img_copy = draw_box_and_char(img_copy, data['cont'], c, box_col, text_col)
detected = {k: 1 for k in let_to_contour}
for det in let_to_contour:
print(det, let_to_contour[det])
print("total detected: ", let_to_contour.keys())
missing = get_missing_chars(detected)
print(f"n_missing: {len(missing)}")
print(f"chars missing: {missing}")
return img_copy
if __name__ == "__main__":
img_file = "keyboard.jpg"
img = cv2.imread(img_file)
img_with_detected_keys = detect_keys(img)
cv2.imshow("detected", img_with_detected_keys)
cv2.waitKey(0)
cv2.destroyAllWindows()

I want to calculate the local ternary pattern of an image in python. Am I going in the right direction?

Definition of LTP
Here param is the value for k
def get_pixel(img, center, x, y):
new_value = 0
param = 10
try:
if img[x][y] >= center+param:
new_value = 1
elif img[x][y] <= center-param:
new_value = -1
except:
pass
return new_value
# Function for calculating ltp
def ltp_calculated_pixel(img, x, y):
center = img[x][y]
val_ar = []
# top_left
val_ar.append(get_pixel(img, center, x-1, y-1))
# top
val_ar.append(get_pixel(img, center, x-1, y))
# top_right
val_ar.append(get_pixel(img, center, x-1, y + 1))
# right
val_ar.append(get_pixel(img, center, x, y + 1))
# bottom_right
val_ar.append(get_pixel(img, center, x + 1, y + 1))
# bottom
val_ar.append(get_pixel(img, center, x + 1, y))
# bottom_left
val_ar.append(get_pixel(img, center, x + 1, y-1))
# left
val_ar.append(get_pixel(img, center, x, y-1))
# Now, we need to convert binary
# values to decimal
power_val = [1, 2, 4, 8, 16, 32, 64, 128]
val = 0
for i in range(len(val_ar)):
val += val_ar[i] * power_val[i]
return val
Description : The function get_pixel is the code for getting the center pixel value.And the function ltp_calculated_pixel is the function to get the local terary pattern.I am also adding definition of Local Ternary pattern. Also suggest me is there is any algorithm to find the optimal value for k(i.e "param" on this code)

Why is cv2.dnn work faster when I use CPU rather than GPU?

I am new to openCV - CUDA so I have been testing the most simple one which is loading a model on GPU rather than CPU to see how fast GPU is and I am horrified at the result I get.
----------------------------------------------------------------
--- GPU vs CPU ---
--- ---
--- 21.913758993148804 seconds ---3.0586464405059814 seconds ---
--- 22.379303455352783 seconds ---3.1384341716766357 seconds ---
--- 21.500431060791016 seconds ---2.9400241374969482 seconds ---
--- 21.292986392974854 seconds ---3.3738017082214355 seconds ---
--- 20.88358211517334 seconds ---3.388749599456787 seconds ---
I will give my code snippet in case I may be doing something wrong that cause GPU time to spike so high.
def loadYolo():
net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
return net,classes,layer_names,output_layers
#socketio.on('image')
def image(data_image):
sbuf = StringIO()
sbuf.write(data_image)
b = io.BytesIO(base64.b64decode(data_image))
if(str(data_image) == 'data:,'):
pass
else:
pimg = Image.open(b)
frame = cv2.cvtColor(np.array(pimg), cv2.COLOR_RGB2BGR)
frame = resize(frame, width=700)
frame = cv2.flip(frame, 1)
net,classes,layer_names,output_layers=loadYolo()
height, width, channels = frame.shape
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
print("--- %s seconds ---" % (time.time() - start_time))
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(classes), 3))
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.putText(frame, label, (x, y + 30), font, 1, color, 2)
imgencode = cv2.imencode('.jpg', frame)[1]
stringData = base64.b64encode(imgencode).decode('utf-8')
b64_src = 'data:image/jpg;base64,'
stringData = b64_src + stringData
emit('response_back', stringData)
My Gpu is Nvidia 1050 Ti and my CPU is i5 gen 9 in case someone need the specification. Can someone please enlighten me as I am super confused right now? Thank you very much
EDIT 1: I tried to use cv2.dnn.DNN_TARGET_CUDA instead of cv2.dnn.DNN_TARGET_CUDA_FP16, but the time is still terrible compare to CPU. Below is the GPU result :
--- 10.91195559501648 seconds ---
--- 11.344025135040283 seconds ---
--- 11.754926204681396 seconds ---
--- 12.779674530029297 seconds ---
Below is CPU result :
--- 4.780993223190308 seconds ---
--- 4.910650253295898 seconds ---
--- 4.990436553955078 seconds ---
--- 5.246175050735474 seconds ---
it is still slower than CPU
EDIT 2: OpenCv is 4.5.0, CUDA 11.1 and CUDNN 8.0.1
You should definitely only load YOLO once. Recreating it for every image that comes through the socket is slow for both CPU and GPU, but GPU takes longer to initially load which is why you're seeing it run slower than CPU.
I don't understand what you mean by using an LRU cache for your YOLO model. Without seeing the rest of your code structure I can't make any real suggestions, but can you try at least temporarily putting the network into the global space just to see if it runs faster? (remove the function altogether and put its body in the global space)
something like this
net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
#socketio.on('image')
def image(data_image):
sbuf = StringIO()
sbuf.write(data_image)
b = io.BytesIO(base64.b64decode(data_image))
if(str(data_image) == 'data:,'):
pass
else:
pimg = Image.open(b)
frame = cv2.cvtColor(np.array(pimg), cv2.COLOR_RGB2BGR)
frame = resize(frame, width=700)
frame = cv2.flip(frame, 1)
height, width, channels = frame.shape
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
print("--- %s seconds ---" % (time.time() - start_time))
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(classes), 3))
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.putText(frame, label, (x, y + 30), font, 1, color, 2)
imgencode = cv2.imencode('.jpg', frame)[1]
stringData = base64.b64encode(imgencode).decode('utf-8')
b64_src = 'data:image/jpg;base64,'
stringData = b64_src + stringData
emit('response_back', stringData)
From the previous two answer I manage to get the solution changing :
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
into :
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
have help to twice the GPU speed due to my GPU type is not compatible with FP16 this is thanks to Amir Karami and also despite Ian Chu answer did not solve my problem it give me basis to forcefully make all the images to only use one net instances this actually lower the processing time significantly from each needing 10 second into 0.03-0.04 seconds thus surpassing CPU speed by many times. The reason I did not accept both answer because neither really solve my problem but both become strong basis to my solution so I still upvote them. I just leave my answer here in case anyone encounter this problem like me.
DNN_TARGET_CUDA_FP16 refers to 16-bit floating-point. since your gpu is 1050 Ti, your gpu seems not works too well with FP16.you can check it from here and your compute capability from here.
i think you should change this line :
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
into :
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

Pytorch, slicing tensor causes RuntimeError:: one of the variables needed for gradient computation has been modified by an inplace operation:

I wrote a RNN with LSTM cell with Pycharm. The peculiarity of this network is that the output of the RNN is fed into a integration opeartion, computed with Runge-kutta.
The integration takes some input and propagate that in time one step ahead. In order to do so I need to slice the feature tensor X along the batch dimension, and pass this to the Runge-kutta.
class MyLSTM(torch.nn.Module):
def __init__(self, ni, no, sampling_interval, nh=10, nlayers=1):
super(MyLSTM, self).__init__()
self.device = torch.device("cpu")
self.dtype = torch.float
self.ni = ni
self.no = no
self.nh = nh
self.nlayers = nlayers
self.lstms = torch.nn.ModuleList(
[torch.nn.LSTMCell(self.ni, self.nh)] + [torch.nn.LSTMCell(self.nh, self.nh) for i in range(nlayers - 1)])
self.out = torch.nn.Linear(self.nh, self.no)
self.do = torch.nn.Dropout(p=0.2)
self.actfn = torch.nn.Sigmoid()
self.sampling_interval = sampling_interval
self.scaler_states = None
# Options
# description of the whole block
def forward(self, x, h0, train=False, integrate_ode=True):
x0 = x.clone().requires_grad_(True)
hs = x # initiate hidden state
if h0 is None:
h = torch.zeros(hs.shape[0], self.nh, device=self.device)
c = torch.zeros(hs.shape[0], self.nh, device=self.device)
else:
(h, c) = h0
# LSTM cells
for i in range(self.nlayers):
h, c = self.lstms[i](hs, (h, c))
if train:
hs = self.do(h)
else:
hs = h
# Output layer
# y = self.actfn(self.out(hs))
y = self.out(hs)
if integrate_ode:
p = y
y = self.integrate(x0, p)
return y, (h, c)
def integrate(self, x0, p):
# RK4 steps per interval
M = 4
DT = self.sampling_interval / M
X = x0
# X = self.scaler_features.inverse_transform(x0)
for b in range(X.shape[0]):
xx = X[b, :]
for j in range(M):
k1 = self.ode(xx, p[b, :])
k2 = self.ode(xx + DT / 2 * k1, p[b, :])
k3 = self.ode(xx + DT / 2 * k2, p[b, :])
k4 = self.ode(xx + DT * k3, p[b, :])
xx = xx + DT / 6 * (k1 + 2 * k2 + 2 * k3 + k4)
X_all[b, :] = xx
return X_all
def ode(self, x0, y):
# Here I a dynamic model
I get this error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor []], which is output 0 of SelectBackward, is at version 64; expected version 63 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
the problem is in the operations xx = X[b, :] and p[b,:]. I know that because I choose batch dimension of 1, then I can replace the previous two equations with xx=X and p, and this works. How can split the tensor without loosing the gradient?
I had the same question, and after a lot of searching, I added .detach() function after "h" and "c" in the RNN cell.

Logistic Regression not able to find value of theta

I have hundred Entries in csv file.
Physics,Maths,Status_class0or1
30,40,0
90,70,1
Using above data i am trying to build logistic (binary) classifier.
Please advise me where i am doing wrong ? Why i am getting answer in 3*3 Matrix (9 values of theta, where as it should be 3 only)
Here is code:
importing the libraries
import numpy as np
import pandas as pd
from sklearn import preprocessing
reading data from csv file.
df = pd.read_csv("LogisticRegressionFirstBinaryClassifier.csv", header=None)
df.columns = ["Maths", "Physics", "AdmissionStatus"]
X = np.array(df[["Maths", "Physics"]])
y = np.array(df[["AdmissionStatus"]])
X = preprocessing.normalize(X)
X = np.c_[np.ones(X.shape[0]), X]
theta = np.ones((X.shape[1], 1))
print(X.shape) # (100, 3)
print(y.shape) # (100, 1)
print(theta.shape) # (3, 1)
calc_z to caculate dot product of X and theta
def calc_z(X,theta):
return np.dot(X,theta)
Sigmoid function
def sigmoid(z):
return 1 / (1 + np.exp(-z))
Cost_function
def cost_function(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
print("cost_function =" , cost_function(X, y, theta))
def derivativeofcostfunction(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
calculation = np.dot((h - y).T,X)
return calculation
print("derivativeofcostfunction=", derivativeofcostfunction(X, y, theta))
def grad_desc(X, y, theta, lr=.001, converge_change=.001):
cost = cost_function(X, y, theta)
change_cost = 1
num_iter = 1
while(change_cost > converge_change):
old_cost = cost
print(theta)
print (derivativeofcostfunction(X, y, theta))
theta = theta - lr*(derivativeofcostfunction(X, y, theta))
cost = cost_function(X, y, theta)
change_cost = old_cost - cost
num_iter += 1
return theta, num_iter
Here is the output :
[[ 0.4185146 -0.56877556 0.63999433]
[15.39722864 9.73995197 11.07882445]
[12.77277463 7.93485324 9.24909626]]
[[0.33944777 0.58199037 0.52493407]
[0.02106587 0.36300629 0.30297278]
[0.07040604 0.3969297 0.33737757]]
[[-0.05856159 -0.89826735 0.30849185]
[15.18035041 9.59004868 10.92827046]
[12.4804775 7.73302024 9.04599788]]
[[0.33950634 0.58288863 0.52462558]
[0.00588552 0.35341624 0.29204451]
[0.05792556 0.38919668 0.32833157]]
[[-5.17526527e-01 -1.21534937e+00 -1.03387571e-02]
[ 1.49729502e+01 9.44663458e+00 1.07843504e+01]
[ 1.21978140e+01 7.53778010e+00 8.84964495e+00]]
(array([[ 0.34002386, 0.58410398, 0.52463592],
[-0.00908743, 0.34396961, 0.28126016],
[ 0.04572775, 0.3816589 , 0.31948193]]), 46)
I changed this code , just added Transpose while returning the matrix and it fixed my issue.
def derivativeofcostfunction(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
calculation = np.dot((h - y).T,X)
return calculation.T

Resources