Normalizing images passed to torch.transforms.Compose function - machine-learning

How to find the values to pass to the transforms.Normalize function in PyTorch? Also, where in my code, should I exactly do the transforms.Normalize?
Since normalizing the dataset is a pretty well-known task, I was hoping there should be some sort of script for doing that automatically. At least I couldn't find it in PyTorch forum.
transformed_dataset = MothLandmarksDataset(csv_file='moth_gt.csv',
root_dir='.',
transform=transforms.Compose([
Rescale(256),
RandomCrop(224),
transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
std = [ 0.229, 0.224, 0.225 ]),
ToTensor()
]))
for i in range(len(transformed_dataset)):
sample = transformed_dataset[i]
print(i, sample['image'].size(), sample['landmarks'].size())
if i == 3:
break
I know these current values don't pertain to my dataset and pertain to ImageNet but using them I actually get an error:
TypeError Traceback (most recent call last)
<ipython-input-81-eb8dc46e0284> in <module>
10
11 for i in range(len(transformed_dataset)):
---> 12 sample = transformed_dataset[i]
13
14 print(i, sample['image'].size(), sample['landmarks'].size())
<ipython-input-48-9d04158922fb> in __getitem__(self, idx)
30
31 if self.transform:
---> 32 sample = self.transform(sample)
33
34 return sample
~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/transforms.py in __call__(self, img)
59 def __call__(self, img):
60 for t in self.transforms:
---> 61 img = t(img)
62 return img
63
~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/transforms.py in __call__(self, tensor)
210 Tensor: Normalized Tensor image.
211 """
--> 212 return F.normalize(tensor, self.mean, self.std, self.inplace)
213
214 def __repr__(self):
~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/functional.py in normalize(tensor, mean, std, inplace)
278 """
279 if not torch.is_tensor(tensor):
--> 280 raise TypeError('tensor should be a torch tensor. Got {}.'.format(type(tensor)))
281
282 if tensor.ndimension() != 3:
TypeError: tensor should be a torch tensor. Got <class 'dict'>.
So basically three questions:
How can I find the similar values as in ImageNet mean and std for my own custom dataset?
How to pass these values and where? I assume I should do it in transforms.Compose method but I might be wrong.
I assume I should apply Normalize to my entire dataset not just the training set, am I right?
Update:
Trying the provided solution here didn't work for me: https://discuss.pytorch.org/t/about-normalization-using-pre-trained-vgg16-networks/23560/6?u=mona_jalal
mean = 0.
std = 0.
nb_samples = 0.
for data in dataloader:
print(type(data))
batch_samples = data.size(0)
data.shape(0)
data = data.view(batch_samples, data.size(1), -1)
mean += data.mean(2).sum(0)
std += data.std(2).sum(0)
nb_samples += batch_samples
mean /= nb_samples
std /= nb_samples
error is:
<class 'dict'>
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-51-e8ba3c8718bb> in <module>
5 for data in dataloader:
6 print(type(data))
----> 7 batch_samples = data.size(0)
8
9 data.shape(0)
AttributeError: 'dict' object has no attribute 'size'
this is print(data) result:
{'image': tensor([[[[0.2961, 0.2941, 0.2941, ..., 0.2460, 0.2456, 0.2431],
[0.2953, 0.2977, 0.2980, ..., 0.2442, 0.2431, 0.2431],
[0.2941, 0.2941, 0.2980, ..., 0.2471, 0.2471, 0.2448],
...,
[0.3216, 0.3216, 0.3216, ..., 0.2482, 0.2471, 0.2471],
[0.3216, 0.3241, 0.3253, ..., 0.2471, 0.2471, 0.2450],
[0.3216, 0.3216, 0.3216, ..., 0.2471, 0.2452, 0.2431]],
[[0.2961, 0.2941, 0.2941, ..., 0.2460, 0.2456, 0.2431],
[0.2953, 0.2977, 0.2980, ..., 0.2442, 0.2431, 0.2431],
[0.2941, 0.2941, 0.2980, ..., 0.2471, 0.2471, 0.2448],
...,
[0.3216, 0.3216, 0.3216, ..., 0.2482, 0.2471, 0.2471],
[0.3216, 0.3241, 0.3253, ..., 0.2471, 0.2471, 0.2450],
[0.3216, 0.3216, 0.3216, ..., 0.2471, 0.2452, 0.2431]],
[[0.2961, 0.2941, 0.2941, ..., 0.2460, 0.2456, 0.2431],
[0.2953, 0.2977, 0.2980, ..., 0.2442, 0.2431, 0.2431],
[0.2941, 0.2941, 0.2980, ..., 0.2471, 0.2471, 0.2448],
...,
[0.3216, 0.3216, 0.3216, ..., 0.2482, 0.2471, 0.2471],
[0.3216, 0.3241, 0.3253, ..., 0.2471, 0.2471, 0.2450],
[0.3216, 0.3216, 0.3216, ..., 0.2471, 0.2452, 0.2431]]],
[[[0.3059, 0.3093, 0.3140, ..., 0.3373, 0.3363, 0.3345],
[0.3059, 0.3093, 0.3165, ..., 0.3412, 0.3389, 0.3373],
[0.3098, 0.3131, 0.3176, ..., 0.3450, 0.3412, 0.3412],
...,
[0.2931, 0.2966, 0.2931, ..., 0.2549, 0.2539, 0.2510],
[0.2902, 0.2902, 0.2902, ..., 0.2510, 0.2510, 0.2502],
[0.2864, 0.2900, 0.2863, ..., 0.2510, 0.2510, 0.2510]],
[[0.3059, 0.3093, 0.3140, ..., 0.3373, 0.3363, 0.3345],
[0.3059, 0.3093, 0.3165, ..., 0.3412, 0.3389, 0.3373],
[0.3098, 0.3131, 0.3176, ..., 0.3450, 0.3412, 0.3412],
...,
[0.2931, 0.2966, 0.2931, ..., 0.2549, 0.2539, 0.2510],
[0.2902, 0.2902, 0.2902, ..., 0.2510, 0.2510, 0.2502],
[0.2864, 0.2900, 0.2863, ..., 0.2510, 0.2510, 0.2510]],
[[0.3059, 0.3093, 0.3140, ..., 0.3373, 0.3363, 0.3345],
[0.3059, 0.3093, 0.3165, ..., 0.3412, 0.3389, 0.3373],
[0.3098, 0.3131, 0.3176, ..., 0.3450, 0.3412, 0.3412],
...,
[0.2931, 0.2966, 0.2931, ..., 0.2549, 0.2539, 0.2510],
[0.2902, 0.2902, 0.2902, ..., 0.2510, 0.2510, 0.2502],
[0.2864, 0.2900, 0.2863, ..., 0.2510, 0.2510, 0.2510]]],
[[[0.2979, 0.2980, 0.3015, ..., 0.2825, 0.2784, 0.2784],
[0.2980, 0.2980, 0.2980, ..., 0.2830, 0.2764, 0.2795],
[0.2980, 0.2980, 0.3012, ..., 0.2827, 0.2814, 0.2797],
...,
[0.3282, 0.3293, 0.3294, ..., 0.2238, 0.2235, 0.2235],
[0.3255, 0.3255, 0.3255, ..., 0.2240, 0.2235, 0.2229],
[0.3225, 0.3255, 0.3255, ..., 0.2216, 0.2235, 0.2223]],
[[0.2979, 0.2980, 0.3015, ..., 0.2825, 0.2784, 0.2784],
[0.2980, 0.2980, 0.2980, ..., 0.2830, 0.2764, 0.2795],
[0.2980, 0.2980, 0.3012, ..., 0.2827, 0.2814, 0.2797],
...,
[0.3282, 0.3293, 0.3294, ..., 0.2238, 0.2235, 0.2235],
[0.3255, 0.3255, 0.3255, ..., 0.2240, 0.2235, 0.2229],
[0.3225, 0.3255, 0.3255, ..., 0.2216, 0.2235, 0.2223]],
[[0.2979, 0.2980, 0.3015, ..., 0.2825, 0.2784, 0.2784],
[0.2980, 0.2980, 0.2980, ..., 0.2830, 0.2764, 0.2795],
[0.2980, 0.2980, 0.3012, ..., 0.2827, 0.2814, 0.2797],
...,
[0.3282, 0.3293, 0.3294, ..., 0.2238, 0.2235, 0.2235],
[0.3255, 0.3255, 0.3255, ..., 0.2240, 0.2235, 0.2229],
[0.3225, 0.3255, 0.3255, ..., 0.2216, 0.2235, 0.2223]]]],
dtype=torch.float64), 'landmarks': tensor([[[160.2964, 98.7339],
[223.0788, 72.5067],
[ 82.4163, 70.3733],
[152.3213, 137.7867]],
[[198.3194, 74.4341],
[273.7188, 118.7733],
[117.7113, 80.8000],
[182.0750, 107.2533]],
[[137.4789, 92.8523],
[174.9463, 40.3467],
[ 57.3013, 59.1200],
[129.3375, 131.6533]]], dtype=torch.float64)}
dataloader = DataLoader(transformed_dataset, batch_size=3,
shuffle=True, num_workers=4)
and
transformed_dataset = MothLandmarksDataset(csv_file='moth_gt.csv',
root_dir='.',
transform=transforms.Compose(
[
Rescale(256),
RandomCrop(224),
ToTensor()#,
##transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
## std = [ 0.229, 0.224, 0.225 ])
]
)
)
and
class MothLandmarksDataset(Dataset):
"""Face Landmarks dataset."""
def __init__(self, csv_file, root_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.landmarks_frame = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.landmarks_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.root_dir, self.landmarks_frame.iloc[idx, 0])
image = io.imread(img_name)
landmarks = self.landmarks_frame.iloc[idx, 1:]
landmarks = np.array([landmarks])
landmarks = landmarks.astype('float').reshape(-1, 2)
sample = {'image': image, 'landmarks': landmarks}
if self.transform:
sample = self.transform(sample)
return sample

Source code errors
How to pass these values and where? I assume I should do it in
transforms.Compose method but I might be wrong.
In MothLandmarksDataset it is no wonder it is not working as you are trying to pass Dict (sample) to torchvision.transforms which require either torch.Tensor or PIL.Image as input. here to be exact:
sample = {'image': image, 'landmarks': landmarks}
if self.transform:
sample = self.transform(sample)
You could pass sample["image"] into it although you shouldn't. Applying this operation only to sample["image"] would break its relation to landmarks. What you should be after is something like albumentations library (see here) which can transform image and landmarks in the same way to preserve their relations.
Also there is no Rescale transform in torchvision, maybe you meant Resize?
Mean and variance for normalization
Provided code is fine, but you have to unpack your data into torch.Tensor like this:
mean = 0.0
std = 0.0
nb_samples = 0.0
for data in dataloader:
images, landmarks = data["image"], data["landmarks"]
batch_samples = images.size(0)
images_data = images.view(batch_samples, images.size(1), -1)
mean += images_data.mean(2).sum(0)
std += images_data.std(2).sum(0)
nb_samples += batch_samples
mean /= nb_samples
std /= nb_samples
How to pass these values and where? I assume I should do it in
transforms.Compose method but I might be wrong.
Those values should be passed to torchvision.transforms.Normalize applied only to sample["images"], not to sample["landmarks"].
I assume I should apply Normalize to my entire dataset not just the
training set, am I right?
You should calculate normalization values across training dataset and apply those calculated values to validation and test as well.

Related

ValueError: If `preds` and `target` are of shape (N, ...) and `preds` are floats, `target` should be binary

I am using torchmetrics.functional to evaluate my trained model and I get this error. I have attached what my tensor values look like and I belive I can make out the reason behind the error, my dataset includes non-binary values as labels. How do I work around this issue? I really appreciate you time.
Evaluation:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
trained_model = trained_model.to(device)
val_dataset = Dataset(
val_df,
tokenizer,
max_token_len=MAX_TOKEN_COUNT
)
predictions = []
labels = []
for item in tqdm(val_dataset):
_, prediction = trained_model(
item["input_ids"].unsqueeze(dim=0).to(device),
item["attention_mask"].unsqueeze(dim=0).to(device)
)
predictions.append(prediction.flatten())
labels.append(item["labels"].int())
predictions = torch.stack(predictions).detach().cpu()
labels = torch.stack(labels).detach().cpu()
Tensor Value:
tensor([[0.2794, 1.0000, 0.1865, ..., 0.0341, 0.0219, 0.8706],
[0.2753, 1.0000, 0.1864, ..., 0.0352, 0.0218, 0.8693],
[0.2747, 1.0000, 0.1858, ..., 0.0421, 0.0227, 0.8290],
...,
[0.2729, 1.0000, 0.1879, ..., 0.0430, 0.0231, 0.8263],
[0.2835, 1.0000, 0.1814, ..., 0.0363, 0.0215, 0.8570],
[0.2734, 1.0000, 0.1881, ..., 0.0430, 0.0232, 0.8277]])
tensor([[0, 2, 0, ..., 0, 0, 0],
[0, 3, 0, ..., 0, 0, 0],
[0, 1, 0, ..., 0, 0, 1],
...,
[0, 2, 0, ..., 0, 0, 1],
[0, 2, 0, ..., 0, 0, 2],
[0, 1, 1, ..., 0, 0, 1]], dtype=torch.int32)
accuracy(predictions, labels, threshold=THRESHOLD)
ValueError: If preds and target are of shape (N, ...) and preds are floats, target should be binary.

Multiple dimensionality reduction techniques with pipeline and GridSearchCV

we all know the common approach to define a pipeline with a dimensionality reduction technique and then a model for training and testing. Then we can apply the GridSearchCv for hyperparameter tuning.
grid = GridSearchCV(
Pipeline([
('reduce_dim', PCA()),
('classify', RandomForestClassifier(n_jobs = -1))
]),
param_grid=[
{
'reduce_dim__n_components': range(0.7,0.9,0.1),
'classify__n_estimators': range(10,50,5),
'classify__max_features': ['auto', 0.2],
'classify__min_samples_leaf': [40,50,60],
'classify__criterion': ['gini', 'entropy']
}
],
cv=5, scoring='f1')
grid.fit(X,y)
I can understand the above code.
Now i was going through the documentation today and there i found one part code which is little bit strange.
pipe = Pipeline([
# the reduce_dim stage is populated by the param_grid
('reduce_dim', 'passthrough'), # How does this work??
('classify', LinearSVC(dual=False, max_iter=10000))
])
N_FEATURES_OPTIONS = [2, 4, 8]
C_OPTIONS = [1, 10, 100, 1000]
param_grid = [
{
'reduce_dim': [PCA(iterated_power=7), NMF()],
'reduce_dim__n_components': N_FEATURES_OPTIONS, ### No PCA is used..??
'classify__C': C_OPTIONS
},
{
'reduce_dim': [SelectKBest(chi2)],
'reduce_dim__k': N_FEATURES_OPTIONS,
'classify__C': C_OPTIONS
},
]
reducer_labels = ['PCA', 'NMF', 'KBest(chi2)']
grid = GridSearchCV(pipe, n_jobs=1, param_grid=param_grid)
X, y = load_digits(return_X_y=True)
grid.fit(X, y)
First of all while defining a pipeline, it used a string 'passthrough' instead of a object.
('reduce_dim', 'passthrough'), ```
Then while defining different dimensionality reduction technique for the grid search, it used a different strategy. How does [PCA(iterated_power=7), NMF()] this work ?
'reduce_dim': [PCA(iterated_power=7), NMF()],
'reduce_dim__n_components': N_FEATURES_OPTIONS, # here
Please Someone explain the code to me .
Solved - in one line, the order is ['PCA', 'NMF', 'KBest(chi2)']
Courtesy of - seralouk (see answer below)
For Reference If someone looks for more details
1 2 3
It is equivalent as far as I know.
In the documentation you have this:
pipe = Pipeline([
# the reduce_dim stage is populated by the param_grid
('reduce_dim', 'passthrough'),
('classify', LinearSVC(dual=False, max_iter=10000))
])
N_FEATURES_OPTIONS = [2, 4, 8]
C_OPTIONS = [1, 10, 100, 1000]
param_grid = [
{
'reduce_dim': [PCA(iterated_power=7), NMF()],
'reduce_dim__n_components': N_FEATURES_OPTIONS,
'classify__C': C_OPTIONS
},
{
'reduce_dim': [SelectKBest(chi2)],
'reduce_dim__k': N_FEATURES_OPTIONS,
'classify__C': C_OPTIONS
},
]
Initially we have ('reduce_dim', 'passthrough'), and then 'reduce_dim': [PCA(iterated_power=7), NMF()]
The definition of the PCA is done in the second line.
You could define alternatively:
pipe = Pipeline([
# the reduce_dim stage is populated by the param_grid
('reduce_dim', PCA(iterated_power=7)),
('classify', LinearSVC(dual=False, max_iter=10000))
])
N_FEATURES_OPTIONS = [2, 4, 8]
C_OPTIONS = [1, 10, 100, 1000]
param_grid = [
{
'reduce_dim__n_components': N_FEATURES_OPTIONS,
'classify__C': C_OPTIONS
},
{
'reduce_dim': [SelectKBest(chi2)],
'reduce_dim__k': N_FEATURES_OPTIONS,
'classify__C': C_OPTIONS
},
]

Memory error while loading very lard data from h5 file on a cluster

I am running into a MemoryError when I attempt to load a very large dataset from an hdf5 file. I have attached a short example below.
import dask
import dask.array as da
import h5py
from dask.distributed import Client
client = Client('tcp://10.11.69.71:44393')
handle = h5py.File('h5_file.h5', 'r') # matrix size: (4500, 6291456)
a = da.from_array(handle['_data'], chunks='auto') # matrix size: (6291456, 128)
st1 = da.random.random((a.shape[1], 128))
st = client.run(start)
res = da.matmul(a, st1)
res.compute()
this results in the following error:
distributed.worker - WARNING - Compute Failed
Function: execute_task
args: ((subgraph_callable, (<function concatenate_axes at 0x2b85d304a0d0>, [array([[ 42., 50., 5., ..., 168., 203., 214.],
[129., 159., 0., ..., 187., 153., 136.],
[ 0., 0., 0., ..., 228., 209., 204.],
...,
[ 18., 28., 13., ..., 255., 227., 218.],
[ 79., 86., 61., ..., 53., 64., 55.],
[ 42., 76., 106., ..., 101., 35., 20.]], dtype=float32), array([[ 50., 60., 33., ..., 169., 204., 215.],
[ 24., 111., 0., ..., 185., 151., 133.],
[ 0., 0., 0., ..., 226., 207., 202.],
...,
[ 17., 23., 14., ..., 255., 228., 219.],
[111., 120., 101., ..., 53., 64., 55.],
[ 85., 98., 90., ..., 100., 37., 22.]], dtype=float32), array([[ 65., 61., 35., ..., 170., 205., 215.],
[215., 237., 214., ..., 184., 149., 131.],
[ 49., 42., 21., ..., 223., 205., 200.],
...,
[ 16., 20., 11., ..., 255., 229., 220.],
[ 85., 85., 69., ..., 53., 64., 54.],
[ 6
kwargs: {}
Exception: MemoryError()
Am I loading the data incorrectly? I have tried to use result as well to no avail.
PS I am using dask-mpi to create my client
Note that by calling .compute you are asking for the output of your computation to be returned to you as a single, in-memory, numpy array.
If your output is very large then you might instead want to save it to a file, using a function like to_hdf5.

Why are all of my results from SVM the same in scikit learn?

I'm trying to calculate probabilities for a multi-class dataset using scikit learn. However, for some reason, I'm getting a the same probabilities for every example. Any idea what's happening? Does this have to do with my model, my use of the library, or something else? Appreciate any help!
svm_model = svm.SVC(probability=True, kernel='rbf',C=1, decision_function_shape='ovr', gamma=0.001,verbose=100)
svm_model.fit(train_X,train_y)
preds= svm_model.predict_proba(test_X)
train_X looks like this
array([[2350, 5550, 2750.0, ..., 23478, 1, 3],
[2500, 5500, 3095.5, ..., 23674, 0, 3],
[3300, 6900, 3600.0, ..., 6529, 0, 3],
...,
[2150, 6175, 2500.0, ..., 11209, 0, 3],
[2095, 5395, 2595.4, ..., 10070, 0, 3],
[1650, 2850, 2000.0, ..., 25463, 1, 3]], dtype=object)
train_y looks like this
0 1
1 2
10 2
100 2
1000 2
10000 2
10001 2
10002 2
10003 2
10004 2
10005 2
10006 2
10007 2
10008 1
10009 1
1001 2
10010 2
test_X looks like this
array([[2190, 3937, 2200.5, ..., 24891, 1, 5],
[2695, 7000, 2850.0, ..., 5491, 1, 4],
[2950, 12000, 4039.5, ..., 22367, 0, 4],
...,
[2850, 5200, 3000.0, ..., 15576, 1, 1],
[3200, 16000, 4100.0, ..., 1320, 0, 3],
[2100, 3750, 2400.0, ..., 6022, 0, 1]], dtype=object)
My results look like
array([[ 0.07819139, 0.22727628, 0.69453233],
[ 0.07819139, 0.22727628, 0.69453233],
[ 0.07819139, 0.22727628, 0.69453233],
...,
[ 0.07819139, 0.22727628, 0.69453233],
[ 0.07819139, 0.22727628, 0.69453233],
[ 0.07819139, 0.22727628, 0.69453233]])
Start with preprocessing!.
It's very important to standardize your data to zero-mean and unit-variance.
The scikit-learn docs say this:
Support Vector Machine algorithms are not scale invariant, so it is highly recommended to scale your data. For example, scale each attribute on the input vector X to [0,1] or [-1,+1], or standardize it to have mean 0 and variance 1. Note that the same scaling must be applied to the test vector to obtain meaningful results. See section Preprocessing data for more details on scaling and normalization
sklearns Section on Preprocessing
sklearns StandardScaler.
The next step after this is parameter-tuning (C, gamma and co.). This is usually done by GridSearch. But i usually expect people to try a simple LinearSVM first before trying the Kernel-SVM (less hyper-parameters, less computation-time, better generalization for non-optimal parameter-chosings).

Cost-sensitive learning in Tensorflow

I am trying to set up a cost-sensitive binary classification learning in TensorFlow, which would put different penalties on false positives and false negatives. Does anyone know how to create a loss function from a set of penalty weights $(w_1, w_2, w_3, w_4)$ for (true positive, false positive, false negative, true negative).
I went over the standard cost functions offered, but can't figure out how to combine them to get something similar to the above.
Following #Cauchyzhou's answer, if you have the logits, and the sparse labels as well as a cost_matrix whose shape is [L, L], where L is the number of unique labels, you can simply use the function below to calculate the loss
def sparse_cost_sensitive_loss (logits, labels, cost_matrix):
batch_cost_matrix = tf.nn.embedding_lookup(cost_matrix, labels)
eps = 1e-6
probability = tf.clip_by_value(tf.nn.softmax(logits), eps, 1-eps)
cost_values = tf.log(1-probability)*batch_cost_matrix
loss = tf.reduce_mean(-tf.reduce_sum(cost_values, axis=1))
return loss
I am not aware of anyone who has built a cost sensitive neural network classifier but Alejandro Correa Bahnsen has published academic papers for cost sensitive logistic regression and cost sensitive decision trees and a very well documented python cost sensitive classification library named CostCla. CostCla is pretty easy to use if you are familiar with scikit-learn.
You should be able to use the Bayes minimum risk model in the library to minimize the cost of your neural network since it fits a cost model to output prediction probabilities of any classifier.
Note that CostCla is intended to work with potentially different costs for each sample. You give it a cost matrix for your training and test samples. However, you can just make all the rows in the cost matrix the same if that applies to your problem.
Here are a couple of additional academic papers on the subject:
The Foundations of Cost-Sensitive Learning
Optimal ROC Curve for a Combination of Classifiers
cost_matrix:
[[0,1,100],
[1,0,1],
[1,20,0]]
label:
[1,2]
y*:
[[0,1,0],
[0,0,1]]
y(prediction):
[[0.2,0.3,0.5],
[0.1,0.2,0.7]]
label,cost_matrix-->cost_embedding:
[[1,0,1],
[1,20,0]]
It obvious 0.3 in [0.2,0.3,0.5] refers to right lable probility of [0,1,0], so it should not contibute to loss.
0.7 in [0.1,0.2,0.7] is the same. In other words, the pos with value 1 in y* not contibute to loss.
So I have (1-y*):
[[1,0,1],
[1,1,0]]
Then the entropy is target*log(predict) + (1-target) * log(1-predict),and value 0 in y*,should use (1-target)*log(1-predict), so I use (1-predict) said (1-y)
1-y:
[[0.8,*0.7*,0.5],
[0.9,0.8,*0.3*]]
(italic num is useless)
the custom loss is
[[1,0,1], [1,20,0]] * log([[0.8,0.7,0.5],[0.9,0.8,0.3]]) *
[[1,0,1],[1,1,0]]
and you can see the (1-y*) can be drop here
so the loss is -tf.reduce_mean(cost_embedding*log(1-y))
,to make it applicable , should be:
-tf.reduce_mean(cost_embedding*log(tf.clip((1-y),1e-10)))
the demo is below
import tensorflow as tf
import numpy as np
hidden_units = 50
num_class = 3
class Model():
def __init__(self,name_scope,is_custom):
self.name_scope = name_scope
self.is_custom = is_custom
self.input_x = tf.placeholder(tf.float32,[None,hidden_units])
self.input_y = tf.placeholder(tf.int32,[None])
self.instantiate_weights()
self.logits = self.inference()
self.predictions = tf.argmax(self.logits,axis=1)
self.losses,self.train_op = self.opitmizer()
def instantiate_weights(self):
with tf.variable_scope(self.name_scope + 'FC'):
self.W = tf.get_variable('W',[hidden_units,num_class])
self.b = tf.get_variable('b',[num_class])
self.cost_matrix = tf.constant(
np.array([[0,1,100],[1,0,100],[20,5,0]]),
dtype = tf.float32
)
def inference(self):
return tf.matmul(self.input_x,self.W) + self.b
def opitmizer(self):
if not self.is_custom:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits\
(labels=self.input_y,logits=self.logits)
else:
batch_cost_matrix = tf.nn.embedding_lookup(
self.cost_matrix,self.input_y
)
loss = - tf.log(1 - tf.nn.softmax(self.logits))\
* batch_cost_matrix
train_op = tf.train.AdamOptimizer().minimize(loss)
return loss,train_op
import random
batch_size = 128
norm_model = Model('norm',False)
custom_model = Model('cost',True)
split_point = int(0.9 * dataset_size)
train_set = datasets[:split_point]
test_set = datasets[split_point:]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100):
batch_index = random.sample(range(split_point),batch_size)
train_batch = train_set[batch_index]
train_labels = lables[batch_index]
_,eval_predict,eval_loss = sess.run([norm_model.train_op,
norm_model.predictions,norm_model.losses],
feed_dict={
norm_model.input_x:train_batch,
norm_model.input_y:train_labels
})
_,eval_predict1,eval_loss1 = sess.run([custom_model.train_op,
custom_model.predictions,custom_model.losses],
feed_dict={
custom_model.input_x:train_batch,
custom_model.input_y:train_labels
})
# print '默认',eval_predict,'\n自定义',eval_predict1
print np.sum(((eval_predict == train_labels)==True).astype(np.int)),\
np.sum(((eval_predict1 == train_labels)==True).astype(np.int))
if i%10 == 0:
print '默认测试',sess.run(norm_model.predictions,
feed_dict={
norm_model.input_x:test_set,
norm_model.input_y:lables[split_point:]
})
print '自定义测试',sess.run(custom_model.predictions,
feed_dict={
custom_model.input_x:test_set,
custom_model.input_y:lables[split_point:]
})
Here is other solution where you can use any tensorflow loss and make it cost sensitive using kwarg weights ... note that unlike most cases here you need to use cost as '1' instead of '0' when you want to keep loss as it is ...
Some advantages of this approach are:
it extends tf.losses.Loss and satisfies the call api
reduction kwarg of the original loss remains functional and the behaviour is propagated to CostSensitiveLoss
you can also pass your own extra weights to new loss instances. Note that internally generated weights are used by wrapped self.loss
import numpy as np
from keras.api._v2 import keras as tk
import tensorflow as tf
from keras.utils import losses_utils
import typing as t
class CostSensitiveLoss(tk.losses.Loss):
def __init__(
self,
cost_matrix: t.List, loss: tk.losses.Loss,
):
super().__init__(reduction=loss.reduction, name=loss.name)
self.loss = loss
self.cost_matrix = cost_matrix
self._cost_matrix = tf.constant(cost_matrix, dtype=tf.float32)
#classmethod
def from_config(cls, config):
config['loss'] = tk.losses.deserialize(config['loss'])
return cls(**config)
def get_config(self):
return {
'cost_matrix': self.cost_matrix,
'loss': tk.losses.serialize(self.loss),
'reduction': self.reduction, 'name': self.name
}
def call(self, y_true, y_pred):
# if y_true is one hot encoded then get integer indices
if y_true.ndim == 1:
y_true_index = y_true
elif y_true.ndim == 2:
y_true_index = tf.argmax(y_true, axis=1)
else:
raise Exception(f"`y_true.ndim` {y_true.ndim} not supported")
# get cost for batch
cost_for_batch = tf.nn.embedding_lookup(self._cost_matrix, y_true_index)
cost_for_batch *= y_pred
cost_for_batch = tf.reduce_sum(cost_for_batch, axis=1)
# get loss
return self.loss(y_true, y_pred, cost_for_batch)
if __name__ == '__main__':
# for debug purpose I have kept 'none' you can
# safely use other options like 'sum', 'auto'
_loss = tk.losses.MeanAbsoluteError(reduction='none')
# some cost matrices the first cost matrix is the case when you are
# not using cost sensitive weights
_cs_loss_1 = CostSensitiveLoss(
cost_matrix=[[1, 1, 1], [1, 1, 1], [1, 1, 1], ],
loss=_loss
)
_cs_loss_2 = CostSensitiveLoss(
cost_matrix=[[1, 2, 2], [4, 1, 4], [8, 8, 1], ],
loss=_loss
)
_cs_loss_3 = CostSensitiveLoss(
cost_matrix=[[1, 4, 8], [2, 1, 8], [2, 4, 1], ],
loss=_loss
)
_y_true = np.asarray(
[
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
]
)
_y_pred = np.asarray(
[
[0.8, 0.1, 0.1],
[0.1, 0.8, 0.1],
[0.1, 0.1, 0.8],
[0.1, 0.8, 0.1],
[0.1, 0.1, 0.8],
[0.8, 0.1, 0.1],
[0.1, 0.1, 0.8],
[0.8, 0.1, 0.1],
[0.1, 0.8, 0.1],
]
)
print("loss ........................")
print(_loss(_y_true, _y_pred).numpy())
print("cs_loss_1 ...................")
print(_cs_loss_1(_y_true, _y_pred).numpy())
print("cs_loss_2 ...................")
print(_cs_loss_2(_y_true, _y_pred).numpy())
print("cs_loss_3 ...................")
print(_cs_loss_3(_y_true, _y_pred).numpy())

Resources