Is there a way to access tqdm's format_dict that is being from another module? - tqdm

Is there a way to access tqdm's format_dict that's being run inside another module without editing the module itself?
import streamlit as st
from diffusers import StableDiffusionPipeline, LMSDiscreteScheduler
from PIL import Image
import sys
def generate(prompt, total_steps, type=0):
lms = LMSDiscreteScheduler(
beta_start=0.00085,
beta_end=0.012,
beta_schedule='scaled_linear'
)
# load text input to diffusion pipeline
experimental_pipe = StableDiffusionPipeline.from_pretrained(types[type], scheduler=lms,
use_auth_token=True, cache_dir="D:\ImageGenerator")
experimental_pipe = experimental_pipe.to("cuda")
images = experimental_pipe(prompt, num_inference_steps=total_steps)
# StableDiffusionPipeline (experimental_pipe) creates an instance of the tqdm class
return images. Images

Related

Computing dask array chunks asynchronously (Dask + FastAPI)

I am building a FastAPI application that will serve chunks of a Dask Array. I would like to leverage FastAPI's asynchronous functionality alongside Dask-distributed's ability to operate asynchronously. Below is a mcve that demonstrates what I'm trying to do on both the server and client sides of the application:
Server-side:
import time
import dask.array as da
import numpy as np
import uvicorn
from dask.distributed import Client
from fastapi import FastAPI
app = FastAPI()
# create a dask array that we can serve
data = da.from_array(np.arange(0, 1e6, dtype=np.int), chunks=100)
async def _get_block(block_id):
"""return one block of the dask array as a list"""
block_data = data.blocks[block_id].compute()
return block_data.tolist()
#app.get("/")
async def get_root():
time.sleep(1)
return {"Hello": "World"}
#app.get("/{block_id}")
async def get_block(block_id: int):
time.sleep(1) # so we can test concurrency
my_list = await _get_block(block_id)
return {"block": my_list}
if __name__ == "__main__":
client = Client(n_workers=2)
print(client)
print(client.cluster.dashboard_link)
uvicorn.run(app, host="0.0.0.0", port=9000, log_level="debug")
Client-side
import dask
import requests
from dask.distributed import Client
client = Client()
responses = [
dask.delayed(requests.get, pure=False)(f"http://127.0.0.1:9000/{i}") for i in range(10)
]
dask.compute(responses)
In this setup, the compute() call in _get_block is "blocking" and only one chunk is computed at a time. I've tried various combinations of Client(asynchronous=True) and client.compute(dask.compute(responses)) without any improvement. Is it possible to await the computation of the dask array?
This line
block_data = data.blocks[block_id].compute()
is a blocking call. If you instead did client.compute(data.blocks[block_id]), you would get an awaitable future that can be used in conjunction with you IOLoop, so long as Dask is using the same loop.
Note that the Intake server would very much like to work in this manner (it too aspires to stream data by chunks for arrays and other data types).

How to let all worker do same task in dask?

I want to let all workers do same task ,like this:
from dask import distributed
from distributed import Client,LocalCluster
import dask
import socket
def writer(filename,data):
with open(filename,'w') as f:
f.writelines(data)
def get_ip(x):
return socket.gethostname()
#writer('/data/1.txt',a)
client = Client('192.168.123.1:8786')
A=client.submit(get_ip, 0,workers=['w1','w2'], pure=False)
print(client.ncores(),
client.scheduler_info()
# dask.config.get('distributed')
)
A.result()
i have 2 workers,but just print 1 workers'hostname
A simple way to achieve what you want is to use the Client.run method
client.run(socket.gethostname)
This runs the function on all workers and returns all results. It does not use the normal task scheduling system, which is designed for a very different purpose from what you want.

Retrained inception_v3 model deployed in Cloud ML Engine always outputs the same predictions

I followed the codelab TensorFlow For Poets for transfer learning using inception_v3. It generates retrained_graph.pb and retrained_labels.txt files, which can used to make predictions locally (running label_image.py).
Then, I wanted to deploy this model to Cloud ML Engine, so that I could make online predictions. For that, I had to export the retrained_graph.pb to SavedModel format. I managed to do it by following the indications in this answer from Google's #rhaertel80 and this python file from the Flowers Cloud ML Engine Tutorial. Here is my code:
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import signature_def_utils
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.saved_model import utils as saved_model_utils
export_dir = '../tf_files/saved7'
retrained_graph = '../tf_files/retrained_graph2.pb'
label_count = 5
def build_signature(inputs, outputs):
signature_inputs = { key: saved_model_utils.build_tensor_info(tensor) for key, tensor in inputs.items() }
signature_outputs = { key: saved_model_utils.build_tensor_info(tensor) for key, tensor in outputs.items() }
signature_def = signature_def_utils.build_signature_def(
signature_inputs,
signature_outputs,
signature_constants.PREDICT_METHOD_NAME
)
return signature_def
class GraphReferences(object):
def __init__(self):
self.examples = None
self.train = None
self.global_step = None
self.metric_updates = []
self.metric_values = []
self.keys = None
self.predictions = []
self.input_jpeg = None
class Model(object):
def __init__(self, label_count):
self.label_count = label_count
def build_image_str_tensor(self):
image_str_tensor = tf.placeholder(tf.string, shape=[None])
def decode_and_resize(image_str_tensor):
return image_str_tensor
image = tf.map_fn(
decode_and_resize,
image_str_tensor,
back_prop=False,
dtype=tf.string
)
return image_str_tensor
def build_prediction_graph(self, g):
tensors = GraphReferences()
tensors.examples = tf.placeholder(tf.string, name='input', shape=(None,))
tensors.input_jpeg = self.build_image_str_tensor()
keys_placeholder = tf.placeholder(tf.string, shape=[None])
inputs = {
'key': keys_placeholder,
'image_bytes': tensors.input_jpeg
}
keys = tf.identity(keys_placeholder)
outputs = {
'key': keys,
'prediction': g.get_tensor_by_name('final_result:0')
}
return inputs, outputs
def export(self, output_dir):
with tf.Session(graph=tf.Graph()) as sess:
with tf.gfile.GFile(retrained_graph, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name="")
g = tf.get_default_graph()
inputs, outputs = self.build_prediction_graph(g)
signature_def = build_signature(inputs=inputs, outputs=outputs)
signature_def_map = {
signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def
}
builder = saved_model_builder.SavedModelBuilder(output_dir)
builder.add_meta_graph_and_variables(
sess,
tags=[tag_constants.SERVING],
signature_def_map=signature_def_map
)
builder.save()
model = Model(label_count)
model.export(export_dir)
This code generates a saved_model.pb file, which I then used to create the Cloud ML Engine model. I can get predictions from this model using gcloud ml-engine predict --model my_model_name --json-instances request.json, where the contents of request.json are:
{ "key": "0", "image_bytes": { "b64": "jpeg_image_base64_encoded" } }
However, no matter which jpeg I encode in the request, I always get the exact same wrong predictions:
Prediction output
I guess the problem is in the way the CloudML Prediction API passes the base64 encoded image bytes to the input tensor "DecodeJpeg/contents:0" of inception_v3 ("build_image_str_tensor()" method in the previous code). Any clue on how can I solve this issue and have my locally retrained model serving correct predictions on Cloud ML Engine?
(Just to make it clear, the problem is not in retrained_graph.pb, as it makes correct predictions when I run it locally; nor is it in request.json, because the same request file worked without problems when following the Flowers Cloud ML Engine Tutorial pointed above.)
First, a general warning. The TensorFlow for Poets codelab was not written in a way that is very amenable to production serving (partly manifested by the workarounds you are having to implement). You would normally export a prediction-specific graph that doesn't contain all of the extra training ops. So while we can try and hack something together that works, extra work may be needed to productionize this graph.
The approach of your code appears to be to import one graph, add some placeholders, and then export the result. This is generally fine. However, in the code shown in the question, you are adding input placeholders without actually connecting them to anything in the imported graph. You end up with a graph containing multiple disconnected subgraphs, something like (excuse the crude diagram):
image_str_tensor [input=image_bytes] -> <nothing>
keys_placeholder [input=key] -> identity [output=key]
inception_subgraph -> final_graph [output=prediction]
By inception_subgraph I mean all of the ops that you are importing.
So image_bytes is effectively a no-op and is ignored; key gets passed through; and prediction contains the result of running the inception_subgraph; since it's not using the input you are passing, it's returning the same result everytime (though I admit I actually expected an error here).
To address this problem, we would need to connect the placeholder you've created to the one that already exists in inception_subgraph to create a graph more or less like this:
image_str_tensor [input=image_bytes] -> inception_subgraph -> final_graph [output=prediction]
keys_placeholder [input=key] -> identity [output=key]
Note that image_str_tensor is going to be a batch of images, as required by the prediction service, but the inception graph's input is actually a single image. In the interest of simplicity, we're going to address this in a hacky way: we'll assume we'll be sending images one-by-one. If we ever send more than one image per request, we'll get errors. Also, batch prediction will never work.
The main change you need is the import statement, which connects the placeholder we've added to the existing input in the graph (you'll also see the code for changing the shape of the input):
Putting it all together, we get something like:
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import signature_def_utils
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.saved_model import utils as saved_model_utils
export_dir = '../tf_files/saved7'
retrained_graph = '../tf_files/retrained_graph2.pb'
label_count = 5
class Model(object):
def __init__(self, label_count):
self.label_count = label_count
def build_prediction_graph(self, g):
inputs = {
'key': keys_placeholder,
'image_bytes': tensors.input_jpeg
}
keys = tf.identity(keys_placeholder)
outputs = {
'key': keys,
'prediction': g.get_tensor_by_name('final_result:0')
}
return inputs, outputs
def export(self, output_dir):
with tf.Session(graph=tf.Graph()) as sess:
# This will be our input that accepts a batch of inputs
image_bytes = tf.placeholder(tf.string, name='input', shape=(None,))
# Force it to be a single input; will raise an error if we send a batch.
coerced = tf.squeeze(image_bytes)
# When we import the graph, we'll connect `coerced` to `DecodeJPGInput:0`
input_map = {'DecodeJPGInput:0': coerced}
with tf.gfile.GFile(retrained_graph, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, input_map=input_map, name="")
keys_placeholder = tf.placeholder(tf.string, shape=[None])
inputs = {'image_bytes': image_bytes, 'key': keys_placeholder}
keys = tf.identity(keys_placeholder)
outputs = {
'key': keys,
'prediction': tf.get_default_graph().get_tensor_by_name('final_result:0')}
}
tf.simple_save(sess, output_dir, inputs, outputs)
model = Model(label_count)
model.export(export_dir)
I believe that your error is quite simple to solve:
{ "key": "0", "image_bytes": { "b64": "jpeg_image_base64_encoded" } }
You used " to specify what, I believe, is a string. By doing that, your program is reading jpeg_image_base64_encoded instead of the actual value of the variable.
That's why you get always the same prediction.
For anyone working on deploying TensorFlow image-based models on Google Cloud ML, in particular trying to get the base64 encoding working for images (as discussed in this question), I'd recommend also having a look at the following repo that I put together. I spent a lot of time working through the deployment process and was only able to find partial information across the web and on stack overflow. This repo has a full working version of deploying a TensorFlow tf.keras model onto google cloud ML and I think it will be of help to people who are facing the same challenges I faced. Here's the github link:
https://github.com/mhwilder/tf-keras-gcloud-deployment.
The repo covers the following topics:
Training a fully convolutional tf.keras model locally (mostly just to have a model for testing the next parts)
Example code for exporting models that work with the Cloud ML Engine
Three model versions that accept different JSON input types (1. An image converted to a simple list string, 2. An image converted to a base64 encoded string, and 3. A URL that points to an image in a Google Storage bucket)
Instructions and references for general Google Cloud Platform setup
Code for preparing the input JSON files for the 3 different input types
Google Cloud ML model and version creation instructions from the console
Examples using the Google Cloud SDK to call predict on the models

Is there any fast and efficient way to get abstracts from pubmed?

I would like to download large scientific abstract data for lets say about 2000 Pubmed IDs. My python code is sloppy and seems rather slow working. Is there any fast and efficient method to do harvest these abstracts?
If this is the fastest method how do I measure it so I become able compare against others or home against work situation (different ISP may play part in speed)?
Attached my code below.
import sqlite3
from Bio.Entrez import read,efetch,email,tool
from metapub import PubMedFetcher
import pandas as pd
import requests
from datetime import date
import xml.etree.ElementTree as ET
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')
Abstract_data = pd.DataFrame(columns=["name","pmid","abstract"])
def abstract_download(self,dict_pmids):
"""
This method returns abstract for a given pmid and add to the abstract data
"""
index=0
baseUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
for names in dict_pmids:
for pmid in dict_pmids[names]:
try:
abstract = []
url = baseUrl+"efetch.fcgi?db=pubmed&id="+pmid+"&rettype=xml"+
response=requests.request("GET",url,timeout=500).text
response=response.encode('utf-8')
root=ET.fromstring(response)
root_find=root.findall('./PubmedArticle/MedlineCitation/Article/Abstract/')
if len(root_find)==0:
root_find=root.findall('./PubmedArticle/MedlineCitation/Article/ArticleTitle')
for i in range(len(root_find)):
if root_find[i].text != None:
abstract.append(root_find[i].text)
if abstract is not None:
Abstract_data.loc[index]=names,pmid,"".join(abstract)
index+=1
except:
print "Connection Refused"
time.sleep(5)
continue
return Abstract_data
EDIT: The general error that occurs for this script is seemingly a "Connection Refused". See the answer of ZF007 below how this was solved.
The below code works. Your script hang on malformed URL construction. Also if things went wrong inside the script the response was a refused connection. This was infact not the case because it was the code that did the processing of the retrieved data.. I've made some adjustments to get the code working for me and left comments in place where you need to adjust due to the lack of the dict_pmids list.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, time, requests, sqlite3
import pandas as pd
import xml.etree.ElementTree as ET
from metapub import PubMedFetcher
from datetime import date
from Bio.Entrez import read,efetch,email,tool
def abstract_download(pmids):
"""
This method returns abstract for a given pmid and add to the abstract data
"""
index = 0
baseUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
collected_abstract = []
# code below diabled to get general abstract extraction from pubmed working. I don't have the dict_pmid list.
"""
for names in dict_pmids:
for pmid in dict_pmids[names]:
move below working code to the right to get it in place with above two requirements prior to providing dict_pmid list.
# from here code works upto the next comment. I don't have the dict_pmid list.
"""
for pmid in pmids:
print 'pmid : %s\n' % pmid
abstract = []
root = ''
try:
url = '%sefetch.fcgi?db=pubmed&id=%s&rettype=xml' % (baseUrl, pmid)
# checks my url... line to parse into a webbrowser like firefox.
print 'url', url
response = requests.request("GET", url, timeout=500).text
# check if I got a response.
print 'response', response
# response = response.encode('utf-8')
root = ET.fromstring(response)
except Exception as inst:
# besides a refused connection.... the "why" it was connected comes in handly to resolve issues at hand
# if and when they happen.
print "Connection Refused", inst
time.sleep(5)
continue
root_find = root.findall('./PubmedArticle/MedlineCitation/Article/Abstract/')
if len(root_find)==0:
root_find = root.findall('./PubmedArticle/MedlineCitation/Article/ArticleTitle')
# check if I found something
print 'root_find : %s\n\n' % root_find
for i in range(len(root_find)):
if root_find[i].text != None:
abstract.append(root_find[i].text)
Abstract_data = pd.DataFrame(columns=["name","pmid","abstract"])
# check if I found something
#print 'abstract : %s\n' % abstract
# code works up to the print statement ''abstract', abstract' teh rest is disabled because I don't have the dict_pmid list.
if abstract is not None:
# Abstract_data.loc[index] = names,pmid,"".join(abstract)
index += 1
collected_abstract.append(abstract)
# change back return Abstract_data when dict_pmid list is administered.
# return Abstract_data
return collected_abstract
if __name__ == '__main__':
sys.stdout.flush()
reload(sys)
sys.setdefaultencoding('utf8')
pubmedIDs = range(21491000, 21491001)
mydata = abstract_download(pubmedIDs)
print 'mydata : %s' % (mydata)

Groovy Script to add new phase job to multi-job in Jenkins

For the already available multi-job in jenkins, need to add new phase jobs using Groovy Scripting. I have written the following groovy code which adds up an already existing job p25_deploy-1.
This code is working to create the multi-job but the phase job is not showing as mapped in the Jenkins UI. Where as if I see it config.xml, its created properly as expected except a tag <killPhaseOnJobResultCondition>. Not sure why the phase job is not mapped properly?
import jenkins.model.*
import hudson.model.*
import com.tikal.jenkins.plugins.multijob.*
import com.tikal.jenkins.plugins.multijob.PhaseJobsConfig.*
import com.tikal.jenkins.plugins.multijob.PhaseJobsConfig.KillPhaseOnJobResultCondition.*
import java.lang.String.*
import hudson.model.Descriptor;
import hudson.tasks.Builder;
def jenkinsInstance = jenkins.model.Jenkins.instance
def templateJobName = 'profile_p25'
def templateJob = jenkinsInstance.getJob(templateJobName)
// get MultiJob BuildPhases and clone each PhaseJob
builders = templateJob.getBuilders();
builders.each { b ->
if (b instanceof MultiJobBuilder){
def pj = b.getPhaseJobs()
hudson.model.Describable p1 = new PhaseJobsConfig("p25_deploy-1",null,
true,PhaseJobsConfig.KillPhaseOnJobResultCondition NEVER,null,false,false,null,0,false,true,null,false,false)
pj.add(p1)
}
}
templateJob.save()
// update dependencies
jenkinsInstance.rebuildDependencyGraph()
Any help will be really appreciated. Have tried many ways but was not able to figure out the problem with the script.
We can use DSL to create but I wanted it to be done in Groovy Scripting and moreover modify the existing job.
Blockquote
Yay! I am back with the answer for my question. Have tried this since very long time. Finally am able to make it though. I was aware that solution would be really simple but not able to figure out the hack of it.
import jenkins.model.*
import hudson.model.*
import com.tikal.jenkins.plugins.multijob.*
import com.tikal.jenkins.plugins.multijob.PhaseJobsConfig.*
import com.tikal.jenkins.plugins.multijob.PhaseJobsConfig.KillPhaseOnJobResultCondition.*
import java.lang.String.*
import hudson.model.Descriptor
import hudson.tasks.Builder
def jenkinsInstance = jenkins.model.Jenkins.instance
def templateJobName = 'profile_p25'
def templateJob = jenkinsInstance.getJob(templateJobName)
// get MultiJob BuildPhases and clone each PhaseJob
builders = templateJob.getBuilders();
builders.each { b -> if (b instanceof MultiJobBuilder)
{ def pj =
b.getPhaseJobs()
hudson.model.Describable newphase = new
PhaseJobsConfig(deploys[i],null,
true,null,null,false,false,null,0,false,false,"",false,false)
newphase.killPhaseOnJobResultCondition = 'NEVER'
pj.add(newphase)
}
}
templateJob.save()

Resources