Part of requested market data is not subscribed-ib_insync - interactive-brokers

I have a problem with getting option parameters using the library ib_insync.
import time
from ib_insync import *
import pandas as pd
from configparser import ConfigParser
from ibapi.common import TickerId, SetOfFloat, SetOfString, MarketDataTypeEnum
config = ConfigParser()
# TWs 7497, IBGW 4001
def get_chain(ib,ticker, exp_list):
exps = {}
df = pd.DataFrame(columns=['strike', 'kind', 'close', 'last'])
for i in exp_list:
ib.sleep()
cds = ib.reqContractDetails(Option(ticker, i, exchange='SMART'))
options = [cd.contract for cd in cds]
tickers = [t for i in range(0, len(options), 100)
for t in ib.reqTickers(*options[i:i + 100])]
for x in tickers:
df = df.append(
{'strike': x.contract.strike, 'kind': x.contract.right, 'close': x.close, 'last': x.last, 'bid': x.bid,
'ask': x.ask, 'mid': (x.bid + x.ask) / 2, 'volume': x.volume}, ignore_index=True)
exps[i] = df
return exps
def get_individual(ib,ticker, exp, strike, kind):
cds = ib.reqContractDetails(Option(ticker, exp, strike, kind, exchange='SMART'))
options = [cd.contract for cd in cds]
tickers = [t for i in range(0, len(options), 100) for t in ib.reqTickers(*options[i:i + 100])]
con = {'strike': tickers[0].contract.strike, 'kind': tickers[0].contract.right, 'close': tickers[0].close,
'last': tickers[0].last, 'bid': tickers[0].bid, 'ask': tickers[0].ask, 'volume': tickers[0].volume}
return con
def main():
with IB().connect('127.0.0.1', 7497) as ib:
ib.reqMarketDataType(3)
time.sleep(1)
print(get_chain(ib,"AAPL", ["20220211"]))
if __name__ == '__main__':
main()
Output: "Error 10090, reqId 4: Part of requested market data is not subscribed. Subscription-independent ticks are still active.Delayed market data is available.AAPL NASDAQ.NMS/TOP/ALL"
My solution was to implement ib.sleep because I thought maybe the calls are overlapping. As you can see the MarketDataType is set to 3.
In the end a table with strikes and all other parameters are NaN. Sometimes the first row gets some values, because of this I think it is some kind of overlapping problem.
Sorry if my question is to long.

Related

cvxpy infeasible error with different number of samples

Does anyone know why cvxpy throws an infeasible error when I change the number of samples in constrained OLS? I'm attaching code to re-create my issue. n=100000 is fine, but n=400000 fails.
import cvxpy as cp
import numpy as np
class constrained_ols:
def __init__ (self, xdim=6):
self.xdim = xdim
return None
def fit(self, x, y):
import cvxpy as cp
w = cp.Variable(self.xdim)
i = cp.Variable()
quad_prog = cp.Minimize(cp.sum_squares(y-(x#w+i)))
cons = [w>=0, cp.sum(w)<=1.02, cp.sum(w)>=.98]
problem = cp.Problem(quad_prog, cons)
problem.solve()
print(problem.status)
self.coef_ = w.value
self.intercept_ = i.value
def predict(self, x):
return x # self.coef_
n = 100000
x = np.random.normal(0,1,(n,10))
y = np.random.normal(0,1,n)
model=constrained_ols(xdim=10)
model.fit(x,y)
model.coef_,model.intercept_
I was expecting to get a vector of 10 coefficients and an intercept but instead I got none values.

When is one supposed to run wandb.watch so that weights and biases tracks params and gradients properly?

I was trying out the wandb library and I run wandb.watch but that doesn't seem to work on my code. It's not supposed to be anything to complicated so I am puzzled why it's not working.
Code:
"""
https://docs.wandb.ai/guides/track/advanced/distributed-training
import wandb
# 1. Start a new run
wandb.init(project='playground', entity='brando')
# 2. Save model inputs and hyperparameters
config = wandb.config
config.learning_rate = 0.01
# 3. Log gradients and model parameters
wandb.watch(model)
for batch_idx, (data, target) in enumerate(train_loader):
...
if batch_idx % args.log_interval == 0:
# 4. Log metrics to visualize performance
wandb.log({"loss": loss})
Notes:
- call wandb.init and wandb.log only from the leader process
"""
from argparse import Namespace
from pathlib import Path
from typing import Union
import torch
from torch import nn
from torch.nn.functional import mse_loss
from torch.optim import Optimizer
import uutils
from uutils.torch_uu import r2_score_from_torch
from uutils.torch_uu.distributed import is_lead_worker
from uutils.torch_uu.models import get_simple_model
from uutils.torch_uu.tensorboard import log_2_tb_supervisedlearning
import wandb
def log_2_wandb_nice(it, loss, inputs, outputs, captions):
wandb.log({"loss": loss, "epoch": it,
"inputs": wandb.Image(inputs),
"logits": wandb.Histogram(outputs),
"captions": wandb.HTML(captions)})
def log_2_wandb(**metrics):
""" Log to wandb """
new_metrics: dict = {}
for key, value in metrics.items():
key = str(key).strip('_')
new_metrics[key] = value
wandb.log(new_metrics)
def log_train_val_stats(args: Namespace,
it: int,
train_loss: float,
train_acc: float,
valid,
log_freq: int = 10,
ckpt_freq: int = 50,
force_log: bool = False, # e.g. at the final it/epoch
save_val_ckpt: bool = False,
log_to_tb: bool = False,
log_to_wandb: bool = False
):
"""
log train and val stats.
Note: Unlike save ckpt, this one does need it to be passed explicitly (so it can save it in the stats collector).
"""
from uutils.torch_uu.tensorboard import log_2_tb
from matplotlib import pyplot as plt
# - is it epoch or iteration
it_or_epoch: str = 'epoch_num' if args.training_mode == 'epochs' else 'it'
# if its
total_its: int = args.num_empochs if args.training_mode == 'epochs' else args.num_its
print(f'-- {it == total_its - 1}')
print(f'-- {it}')
print(f'-- {total_its}')
if (it % log_freq == 0 or is_lead_worker(args.rank) or it == total_its - 1 or force_log) and is_lead_worker(args.rank):
print('inside log')
# - get eval stats
val_loss, val_acc = valid(args, args.mdl, save_val_ckpt=save_val_ckpt)
# - print
args.logger.log('\n')
args.logger.log(f"{it_or_epoch}={it}: {train_loss=}, {train_acc=}")
args.logger.log(f"{it_or_epoch}={it}: {val_loss=}, {val_acc=}")
# - record into stats collector
args.logger.record_train_stats_stats_collector(it, train_loss, train_acc)
args.logger.record_val_stats_stats_collector(it, val_loss, val_acc)
args.logger.save_experiment_stats_to_json_file()
fig = args.logger.save_current_plots_and_stats()
# - log to wandb
if log_to_wandb:
# if it == 0:
# # -- todo why isn't this working?
# wandb.watch(args.mdl)
# print('watching model')
# log_2_wandb(train_loss=train_loss, train_acc=train_acc)
print('inside wandb log')
wandb.log(data={'train loss': train_loss, 'train acc': train_acc, 'val loss': val_loss, 'val acc': val_acc}, step=it)
wandb.log(data={'it': it}, step=it)
if it == total_its - 1:
print(f'logging fig at {it=}')
wandb.log(data={'fig': fig}, step=it)
plt.close('all')
# - log to tensorboard
if log_to_tb:
log_2_tb_supervisedlearning(args.tb, args, it, train_loss, train_acc, 'train')
log_2_tb_supervisedlearning(args.tb, args, it, train_loss, train_acc, 'val')
# log_2_tb(args, it, val_loss, val_acc, 'train')
# log_2_tb(args, it, val_loss, val_acc, 'val')
# - log ckpt
if (it % ckpt_freq == 0 or it == total_its - 1 or force_log) and is_lead_worker(args.rank):
save_ckpt(args, args.mdl, args.optimizer)
def save_ckpt(args: Namespace, mdl: nn.Module, optimizer: torch.optim.Optimizer,
dirname: Union[None, Path] = None, ckpt_name: str = 'ckpt.pt'):
"""
Saves checkpoint for any worker.
Intended use is to save by worker that got a val loss that improved.
"""
import dill
dirname = args.log_root if (dirname is None) else dirname
# - pickle ckpt
assert uutils.xor(args.training_mode == 'epochs', args.training_mode == 'iterations')
pickable_args = uutils.make_args_pickable(args)
torch.save({'state_dict': mdl.state_dict(),
'epoch_num': args.epoch_num,
'it': args.it,
'optimizer': optimizer.state_dict(),
'args': pickable_args,
'mdl': mdl},
pickle_module=dill,
f=dirname / ckpt_name) # f'mdl_{epoch_num:03}.pt'
def get_args() -> Namespace:
args = uutils.parse_args_synth_agent()
# we can place model here...
args = uutils.setup_args_for_experiment(args)
return args
def valid_for_test(args: Namespace, mdl: nn.Module, save_val_ckpt: bool = False):
import torch
for t in range(1):
x = torch.randn(args.batch_size, 5)
y = (x ** 2 + x + 1).sum(dim=1)
y_pred = mdl(x).squeeze(dim=1)
val_loss, val_acc = mse_loss(y_pred, y), r2_score_from_torch(y_true=y, y_pred=y_pred)
if val_loss.item() < args.best_val_loss and save_val_ckpt:
args.best_val_loss = val_loss.item()
save_ckpt(args, args.mdl, args.optimizer, ckpt_name='ckpt_best_val.pt')
return val_loss, val_acc
def train_for_test(args: Namespace, mdl: nn.Module, optimizer: Optimizer, scheduler=None):
# wandb.watch(args.mdl)
for it in range(args.num_its):
x = torch.randn(args.batch_size, 5)
y = (x ** 2 + x + 1).sum(dim=1)
y_pred = mdl(x).squeeze(dim=1)
train_loss, train_acc = mse_loss(y_pred, y), r2_score_from_torch(y_true=y, y_pred=y_pred)
optimizer.zero_grad()
train_loss.backward() # each process synchronizes it's gradients in the backward pass
optimizer.step() # the right update is done since all procs have the right synced grads
scheduler.step()
log_train_val_stats(args, it, train_loss, train_acc, valid_for_test,
log_freq=2, ckpt_freq=10,
save_val_ckpt=True, log_to_tb=True, log_to_wandb=True)
return train_loss, train_acc
def debug_test():
args: Namespace = get_args()
args.num_its = 12
# - get mdl, opt, scheduler, etc
args.mdl = get_simple_model(in_features=5, hidden_features=20, out_features=1, num_layer=2)
wandb.watch(args.mdl)
args.optimizer = torch.optim.Adam(args.mdl.parameters(), lr=1e-1)
args.scheduler = torch.optim.lr_scheduler.ExponentialLR(args.optimizer, gamma=0.999, verbose=False)
# - train
train_loss, train_acc = train_for_test(args, args.mdl, args.optimizer, args.scheduler)
print(f'{train_loss=}, {train_loss=}')
# - eval
val_loss, val_acc = valid_for_test(args, args.mdl)
print(f'{val_loss=}, {val_acc=}')
# - make sure wandb closes properly
if args.log_to_wandb:
wandb.finish()
if __name__ == '__main__':
import os
# print(os.environ['WANDB_API_KEY'])
import time
start = time.time()
debug_test()
duration_secs = time.time() - start
print(f"\nSuccess, time passed: hours:{duration_secs / (60 ** 2)}, minutes={duration_secs / 60}, seconds={duration_secs}")
print('Done!\a')
code in github: https://github.com/brando90/ultimate-utils/blob/master/tutorials_for_myself/my_wandb/my_wandb_basic1.py
sample run: https://wandb.ai/brando/playground/runs/wpupxvg1
cross posted: https://community.wandb.ai/t/when-is-one-supposed-to-run-wandb-watch-so-that-weights-and-biases-tracks-params-and-gradients-prope/518
Cross posting an answer by charlesfrye in the wandb community forum:
There are two things you might be running into here -- can't confirm because your code relies on the ultimate-utils package.
wandb.watch will only start working once you call wandb.log after a backwards pass that touches the watched Module (docs).
The frequency with which gradients/params are logged is controlled by the log_freq argument. If the number of logging calls is less than the value of log_freq, then no information will be logged. Here's a short colab reproducing this behavior.
Also, if you want params and gradients, you need to set the log kwarg to "all". By default, we log only gradients.
I don't know why but this line of code seems to work:
wandb.watch(args.mdl, mse_loss, log="all", log_freq=10)
perhaps it really needs the loss and the log all despite it not being in the intro/quick start guide:
import wandb
# 1. Start a new run
wandb.init(project='playground', entity='brando')
# 2. Save model inputs and hyperparameters
config = wandb.config
config.learning_rate = 0.01
# 3. Log gradients and model parameters
wandb.watch(model)
for batch_idx, (data, target) in enumerate(train_loader):
...
if batch_idx % args.log_interval == 0:
# 4. Log metrics to visualize performance
wandb.log({"loss": loss})

How to implement LIME in a Bert model?

I am new to machine learning. I noticed that such questions have been asked before as well but did not receive a proper solution. Below is the code for semantic similarity and I want to implement LIME as a base. Please, help me out.
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('paraphrase-distilroberta-base-v1')
# Two lists of sentences
sentences1 = ['The cat sits outside',
'A man is playing guitar',
'The new movie is awesome']
sentences2 = ['The cat sits outside',
'A woman watches TV',
'The new movie is so great']
#Compute embedding for both lists
embeddings1 = model.encode(sentences1, convert_to_tensor=True)
embeddings2 = model.encode(sentences2, convert_to_tensor=True)
#Compute cosine-similarits
cosine_scores = util.pytorch_cos_sim(embeddings1, embeddings2)
#Output the pairs with their score
for i in range(len(sentences1)):
print("{} \t\t {} \t\t Score: {:.4f}".format(sentences1[i], sentences2[i], cosine_scores[i][i]))
I don't know what Bert is, but try this sample code and see if it helps you.
import pandas as pd
import numpy as np
import sklearn
import sklearn.ensemble
import sklearn.metrics
from sklearn.utils import shuffle
from io import StringIO
import re
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import lime
from lime import lime_text
from lime.lime_text import LimeTextExplainer
from sklearn.pipeline import make_pipeline
df = pd.read_csv('C:\\Users\\ryans\\OneDrive\\Desktop\\Briefcase\\PDFs\\1-ALL PYTHON & R CODE SAMPLES\\A - GITHUB\\Natural Language Processing - Amazon Reviews\\Datafiniti_Amazon_Consumer_Reviews_of_Amazon_Products.csv')
# let's experiment with some sentiment analysis concepts
# first we need to clean up the stuff in the independent field of the DF we are workign with
df.replace('\'','', regex=True, inplace=True)
df['review_title'] = df[['reviews.title']].astype(str)
df['review_text'] = df[['reviews.text']].astype(str)
df['review_title'] = df['reviews.title'].str.replace('\d+', '')
df['review_text'] = df['reviews.text'].str.replace('\d+', '')
# get rid of special characters
df['review_title'] = df['reviews.title'].str.replace(r'[^\w\s]+', '')
df['review_text'] = df['reviews.text'].str.replace(r'[^\w\s]+', '')
# get rid of double spaces
df['review_title'] = df['reviews.title'].str.replace(r'\^[a-zA-Z]\s+', '')
df['review_text'] = df['reviews.text'].str.replace(r'\^[a-zA-Z]\s+', '')
# convert all case to lower
df['review_title'] = df['reviews.title'].str.lower()
df['review_text'] = df['reviews.text'].str.lower()
list_corpus = df["review_text"].tolist()
list_labels = df["reviews.rating"].tolist()
X_train, X_test, y_train, y_test = train_test_split(list_corpus, list_labels, test_size=0.2, random_state=40)
vectorizer = CountVectorizer(analyzer='word',token_pattern=r'\w{1,}', ngram_range=(1, 3), stop_words = 'english', binary=True)
train_vectors = vectorizer.fit_transform(X_train)
test_vectors = vectorizer.transform(X_test)
logreg = LogisticRegression(n_jobs=1, C=1e5)
logreg.fit(train_vectors, y_train)
pred = logreg.predict(test_vectors)
accuracy = accuracy_score(y_test, pred)
precision = precision_score(y_test, pred, average='weighted')
recall = recall_score(y_test, pred, average='weighted')
f1 = f1_score(y_test, pred, average='weighted')
print("accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" % (accuracy, precision, recall, f1))
list_corpus[3]
c = make_pipeline(vectorizer, logreg)
class_names=list(df.review_title.unique())
explainer = LimeTextExplainer(class_names=class_names)
idx = 3
exp = explainer.explain_instance(X_test[idx], c.predict_proba, num_features=6, labels=[1, 1])
print('Document id: %d' % idx)
print('Predicted class =', class_names[logreg.predict(test_vectors[idx]).reshape(1,-1)[0,0]])
print('True class: %s' % class_names[y_test[idx]])
print ('Explanation for class %s' % class_names[1])
print ('\n'.join(map(str, exp.as_list(label=1))))
exp = explainer.explain_instance(X_test[idx], c.predict_proba, num_features=6, top_labels=2)
print(exp.available_labels())
exp.show_in_notebook(text=False)
https://towardsdatascience.com/explain-nlp-models-with-lime-shap-5c5a9f84d59b
https://marcotcr.github.io/lime/tutorials/Lime%20-%20multiclass.html
https://towardsdatascience.com/understanding-model-predictions-with-lime-a582fdff3a3b

Perceptron algorithm is not working as I desired

I recently tried implementing perceptron algorithm but I was not getting the desired output.
Here is the code:
import numpy as np
import pandas as pd
with open("D:/data.txt",'r') as data: #importing the data
column = data.read()
split = np.array(column.split('\n'))
final =[]
for string in split:
final.append(string.split(','))
df = pd.DataFrame(final,columns=['x','y','response'])
df['x'] = df['x'].astype(float)
df['y'] = df['y'].astype(float)
df['response'] = df['response'].astype(int)
X = np.array(df[['x','y']])
y = np.array(df['response'])
def perceptron_algorithm(x,y,learning_rate=0.01,num_epoch=25):
np.random.seed(2)
x_min, x_max = min(x.T[0]), max(x.T[0])
y_min, y_max = min(x.T[1]), max(x.T[0])
w = np.array(np.random.rand(2,1))
b = np.random.rand(1)[0] + x_max
print(w,b)
for i in range(num_epoch):
w,b = perceptronstep(x,y,w,b,learning_rate)
print(w,b)
return w,b
def perceptronstep(x,y,w,b,learning_rate):
for i in range(len(x)):
y_hat = prediction(x[i],w,b)
if y_hat-y[i] == 1:
for j in range(len(w)):
w[j] += x[i][j]*learning_rate
b += learning_rate
elif y_hat-y[i] == -1:
for j in range(len(w)):
w[j] -= x[i][j]*learning_rate
b -= learning_rate
return w,b
def prediction(x,w,b):
return step(np.matmul(x,w)+b)
def step(t):
if t >=0:
return 1
else:
return 0
w,b = perceptron_algorithm(X,y)
This is the resulting line:
This is how the data looks:
Is there something wrong with my code ?
Here is the link to the data file:
https://drive.google.com/drive/folders/1TSug9tE6bljyBFv-u3mIGWW6F_3ZY2oa?usp=sharing
Edit: I have added the initial part of the code so it will be clear what I am trying to do.
Edit 2: I have added the data file and the "import pandas as pd" line of code

Unable to write gradient step in theano for rnn

I have following code in which I convert words to one hot vectors and do a gradient descent in theano using rnn for predicting next words given a sequence of words(basically a language model).
# coding: utf-8
# In[68]:
#Importing stuff
import theano
import theano.tensor as T
import numpy as np
# In[69]:
import nltk
import sys
import operator
import csv
import itertools
from utils import *
from datetime import datetime
# In[70]:
#Fixing vocabulary size for one hot vectors and some initialization stuff
v_size = 8000
unknown_token = "UNKNOWN_TOKEN"
start_token = "<s>"
end_token = "</s>"
# In[71]:
#Read data and start preprocessing
with open('reddit-comments-2015-08.csv','rb') as f:
reader = csv.reader(f, skipinitialspace=True)
reader.next()
sentences = list(itertools.chain(*[nltk.sent_tokenize(x[0].decode('utf-8')) for x in reader]))
print len(sentences)
# In[72]:
#Tokenize the sentences and add start and end tokens
tokenized_sentences = [nltk.word_tokenize(s) for s in sentences]
tokenized_sentences = [[start_token] + s + [end_token] for s in tokenized_sentences]
# In[73]:
#Get word frequencies and use only most frequent words in vocabulary
word_freq = nltk.FreqDist(itertools.chain(*tokenized_sentences))
vocab = word_freq.most_common(v_size-1)
# In[74]:
#Do mapping and reverse mapping
index_to_word = [x[0] for x in vocab]
index_to_word.append(unknown_token)
word_to_index = {w:i for i,w in enumerate(index_to_word)}
#Removing less frequent words
for i, s in enumerate(tokenized_sentences):
tokenized_sentences[i] = [w if w in word_to_index else unknown_token for w in s]
#Got vectors but they are not one hot
X_train = np.asarray([[word_to_index[w] for w in s[:-1]] for s in tokenized_sentences])
Y_train = np.asarray([[word_to_index[w] for w in s[1:]] for s in tokenized_sentences])
#Preprocessing ends here
# In[75]:
#Take only one sentence for now
X_train = X_train[0]
Y_train = Y_train[0]
# In[76]:
#Make input and output as onehot vectors. This can easily be replaced with vectors generated by word2vec.
X_train_onehot = np.eye(v_size)[X_train]
X = theano.shared(np.array(X_train_onehot).astype('float32'), name = 'X')
Y_train_onehot = np.eye(v_size)[Y_train]
Y = theano.shared(np.array(Y_train_onehot).astype('float32'), name = 'Y')
# In[77]:
#Initializing U, V and W
i_dim = v_size
h_dim = 100
o_dim = v_size
U = theano.shared(np.random.randn(i_dim, h_dim).astype('float32'), name = 'U')
W = theano.shared(np.random.randn(h_dim, h_dim).astype('float32'), name = 'W')
V = theano.shared(np.random.randn(h_dim, o_dim).astype('float32'), name = 'V')
# In[78]:
#forward propagation
s = T.vector('s')
results, updates = theano.scan(lambda x, sm1: T.tanh( T.dot(x, U) + T.dot(sm1, W)),
sequences = X_train_onehot,
outputs_info = s
)
y_hat = T.dot(results, V)
forward_propagation = theano.function(inputs=[s], outputs = y_hat)
# In[80]:
#loss
loss = T.sum(T.nnet.categorical_crossentropy(y_hat, Y))
# In[81]:
#Gradients
dw = T.grad(loss, W)
du = T.grad(loss, U)
dv = T.grad(loss, V)
# In[82]:
#BPTT
learning_rate = T.scalar('learning_rate')
gradient_step = theano.function(inputs = [s, learning_rate],
updates = (
(U, U - learning_rate * du),
(V, V - learning_rate * dv),
(W, W - learning_rate * dw)
)
)
# In[ ]:
But it keeps throwing error at gradient step. I am posting full code because I don't know which step is affecting the error. The following is the screenshot of error in jupyter notebook.
I solved it. The problem is with mismatch of types. I had to typecast du, dv, dw, learning rate to float32. By default, they are float64.

Resources