Is there any fast and efficient way to get abstracts from pubmed? - biopython

I would like to download large scientific abstract data for lets say about 2000 Pubmed IDs. My python code is sloppy and seems rather slow working. Is there any fast and efficient method to do harvest these abstracts?
If this is the fastest method how do I measure it so I become able compare against others or home against work situation (different ISP may play part in speed)?
Attached my code below.
import sqlite3
from Bio.Entrez import read,efetch,email,tool
from metapub import PubMedFetcher
import pandas as pd
import requests
from datetime import date
import xml.etree.ElementTree as ET
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')
Abstract_data = pd.DataFrame(columns=["name","pmid","abstract"])
def abstract_download(self,dict_pmids):
"""
This method returns abstract for a given pmid and add to the abstract data
"""
index=0
baseUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
for names in dict_pmids:
for pmid in dict_pmids[names]:
try:
abstract = []
url = baseUrl+"efetch.fcgi?db=pubmed&id="+pmid+"&rettype=xml"+
response=requests.request("GET",url,timeout=500).text
response=response.encode('utf-8')
root=ET.fromstring(response)
root_find=root.findall('./PubmedArticle/MedlineCitation/Article/Abstract/')
if len(root_find)==0:
root_find=root.findall('./PubmedArticle/MedlineCitation/Article/ArticleTitle')
for i in range(len(root_find)):
if root_find[i].text != None:
abstract.append(root_find[i].text)
if abstract is not None:
Abstract_data.loc[index]=names,pmid,"".join(abstract)
index+=1
except:
print "Connection Refused"
time.sleep(5)
continue
return Abstract_data
EDIT: The general error that occurs for this script is seemingly a "Connection Refused". See the answer of ZF007 below how this was solved.

The below code works. Your script hang on malformed URL construction. Also if things went wrong inside the script the response was a refused connection. This was infact not the case because it was the code that did the processing of the retrieved data.. I've made some adjustments to get the code working for me and left comments in place where you need to adjust due to the lack of the dict_pmids list.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, time, requests, sqlite3
import pandas as pd
import xml.etree.ElementTree as ET
from metapub import PubMedFetcher
from datetime import date
from Bio.Entrez import read,efetch,email,tool
def abstract_download(pmids):
"""
This method returns abstract for a given pmid and add to the abstract data
"""
index = 0
baseUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
collected_abstract = []
# code below diabled to get general abstract extraction from pubmed working. I don't have the dict_pmid list.
"""
for names in dict_pmids:
for pmid in dict_pmids[names]:
move below working code to the right to get it in place with above two requirements prior to providing dict_pmid list.
# from here code works upto the next comment. I don't have the dict_pmid list.
"""
for pmid in pmids:
print 'pmid : %s\n' % pmid
abstract = []
root = ''
try:
url = '%sefetch.fcgi?db=pubmed&id=%s&rettype=xml' % (baseUrl, pmid)
# checks my url... line to parse into a webbrowser like firefox.
print 'url', url
response = requests.request("GET", url, timeout=500).text
# check if I got a response.
print 'response', response
# response = response.encode('utf-8')
root = ET.fromstring(response)
except Exception as inst:
# besides a refused connection.... the "why" it was connected comes in handly to resolve issues at hand
# if and when they happen.
print "Connection Refused", inst
time.sleep(5)
continue
root_find = root.findall('./PubmedArticle/MedlineCitation/Article/Abstract/')
if len(root_find)==0:
root_find = root.findall('./PubmedArticle/MedlineCitation/Article/ArticleTitle')
# check if I found something
print 'root_find : %s\n\n' % root_find
for i in range(len(root_find)):
if root_find[i].text != None:
abstract.append(root_find[i].text)
Abstract_data = pd.DataFrame(columns=["name","pmid","abstract"])
# check if I found something
#print 'abstract : %s\n' % abstract
# code works up to the print statement ''abstract', abstract' teh rest is disabled because I don't have the dict_pmid list.
if abstract is not None:
# Abstract_data.loc[index] = names,pmid,"".join(abstract)
index += 1
collected_abstract.append(abstract)
# change back return Abstract_data when dict_pmid list is administered.
# return Abstract_data
return collected_abstract
if __name__ == '__main__':
sys.stdout.flush()
reload(sys)
sys.setdefaultencoding('utf8')
pubmedIDs = range(21491000, 21491001)
mydata = abstract_download(pubmedIDs)
print 'mydata : %s' % (mydata)

Related

Setting timezone in AsyncIOScheduler

I'm in the Pacific timezone and I'm creating a discord bot to send a message at 8am in CENTRAL time.
import os
import discord
from discord.ext import commands
from dotenv import load_dotenv
from rich import print
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
load_dotenv()
TOKEN = os.getenv('DISCORD_TOKEN')
intents = discord.Intents.default()
intents.members = True
bot = commands.Bot(command_prefix = '!', intents=intents)
# Will become the good morning message
async def gm():
c = bot.get_channel(channel_id_removed)
await c.send("This will be the good morning message.")
#bot.event
async def on_ready():
for guild in bot.guilds:
print(
f'{bot.user} is connected to the following guild:\n'
f'\t{guild.name} (id: {guild.id})'
)
#initializing scheduler for time of day sending
scheduler = AsyncIOScheduler()
# Attempts to set the timezone
# scheduler = AsyncIOScheduler(timezone='America/Chicago')
# scheduler = AsyncIOScheduler({'apscheduler.timezone': 'America/Chicago'})
# scheduler.configure(timezone='America/Chicago')
# Set the time for sending
scheduler.add_job(gm, CronTrigger(hour="6", minute="0", second="0"))
#starting the scheduler
scheduler.start()
#bot.event
async def on_member_join(member):
general_channel = None
guild_joined = member.guild
print(guild_joined)
general_channel = discord.utils.get(guild_joined.channels, name='general')
print(f'General Channel ID: {general_channel}')
if general_channel:
embed=discord.Embed(title="Welcome!",description=f"Welcome to The Dungeon {member.mention}!!")
await general_channel.send(embed=embed)
bot.run(TOKEN)
Environment:
Windows 10
Python 3.10.4
APScheduler 3.9.1
pytz 2022.1
pytz-deprecation-shim 0.1.0.post0
tzdata 2022.1
tzlocal 4.2
I'm just wondering if I'm doing something wrong? Or if what I'm trying to do simply isn't supported? It works if I use my local time so I know the function is ok.
You are using the asyncio scheduler but you're not running an asyncio event loop, so there is no way this could work. Copy/paste from the provided example:
from datetime import datetime
import asyncio
import os
from apscheduler.schedulers.asyncio import AsyncIOScheduler
def tick():
print('Tick! The time is: %s' % datetime.now())
if __name__ == '__main__':
scheduler = AsyncIOScheduler()
scheduler.add_job(tick, 'interval', seconds=3)
scheduler.start()
print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
# Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed.
try:
asyncio.get_event_loop().run_forever()
except (KeyboardInterrupt, SystemExit):
pass
The reason it is not working is because, while scheduler.start() instantiates an event loop as a side effect, it expects the loop to be run elsewhere so that the scheduler can do its work.

Access stdout of Conducto Exec node

Is there an example I can see of accessing the stdout of an Exec node? I am trying to parse stdout similarly to Jenkins groovy dsl:
tobeparsed = sh(returnStdout: true, script: "myscript.sh")
That feature is not available in Conducto. When you want to send data from one node to another, there are a few ways to do so.
In these examples let's try to generate a random number in the first node, and have the second echo it.
A simple option is to use co.data to save the data in the first node, then get it from the second node.
import conducto as co
import random
img = co.Image(copy_dir=".")
def first():
# Generate a random number and save it to co.data,
# scoped to the current pipeline.
number = random.random()
co.data.pipeline.puts("random_number", str(number).encode())
print(f"Saved random number to co.data. Value: {number}")
def second():
# Read the value from co.data
stored = co.data.pipeline.gets("random_number")
number = float(stored.decode())
print(f"Read random number from co.data. Value: {number}")
def main() -> co.Serial:
with co.Serial(image=img) as output:
output["first"] = co.Exec("python pipeline.py first")
output["second"] = co.Exec("python pipeline.py second")
return output
if __name__ == '__main__':
co.main(default=main)
A more sophisticated way: the first node could be called from a co.Lazy, and generate the second node once it knows the value.
import conducto as co
import random
img = co.Image(copy_dir=".")
def first() -> co.Serial:
output = co.Serial()
# Generate a random number and encode it into command
# of second node
output["second"] = co.Exec(f"echo {random.random}")
return output
def main() -> co.Serial:
return co.Lazy("python pipeline.py first", image=img)
if __name__ == '__main__':
co.main(default=main)
In either case, save that text to a file called pipeline.py. Run it with python pipeline.py to see the tree it would generate. Run python pipeline.py --local to run it on your machine.
There's nothing magical about the name pipeline.py, just make sure it matches the commands in the co.Exec nodes.

Using DASK to read files and write to NEO4J in PYTHON

I am having trouble parallelizing code that reads some files and writes to neo4j.
I am using dask to parallelize the process_language_files function (3rd cell from the bottom).
I try to explain the code below, listing out the functions (First 3 cells).
The errors are printed at the end (Last 2 cells).
I am also listing environments and package versions at the end.
If I remove dask.delayed and run this code sequentially, its works perfectly well.
Thank you for your help. :)
==========================================================================
Some functions to work with neo4j.
from neo4j import GraphDatabase
from tqdm import tqdm
def get_driver(uri_scheme='bolt', host='localhost', port='7687', username='neo4j', password=''):
"""Get a neo4j driver."""
connection_uri = "{uri_scheme}://{host}:{port}".format(uri_scheme=uri_scheme, host=host, port=port)
auth = (username, password)
driver = GraphDatabase.driver(connection_uri, auth=auth)
return driver
def format_raw_res(raw_res):
"""Parse neo4j results"""
res = []
for r in raw_res:
res.append(r)
return res
def run_bulk_query(query_list, driver):
"""Run a list of neo4j queries in a session."""
results = []
with driver.session() as session:
for query in tqdm(query_list):
raw_res = session.run(query)
res = format_raw_res(raw_res)
results.append({'query':query, 'result':res})
return results
global_driver = get_driver(uri_scheme='bolt', host='localhost', port='8687', username='neo4j', password='abc123') # neo4j driver object.=
This is how we create a dask client to parallelize.
from dask.distributed import Client
client = Client(threads_per_worker=4, n_workers=1)
The functions that the main code is calling.
import sys
import time
import json
import pandas as pd
import dask
def add_nodes(nodes_list, language_code):
"""Returns a list of strings. Each string is a cypher query to add a node to neo4j."""
list_of_create_strings = []
create_string_template = """CREATE (:LABEL {{node_id:{node_id}}})"""
for index, node in nodes_list.iterrows():
create_string = create_string_template.format(node_id=node['new_id'])
list_of_create_strings.append(create_string)
return list_of_create_strings
def add_relations(relations_list, language_code):
"""Returns a list of strings. Each string is a cypher query to add a relationship to neo4j."""
list_of_create_strings = []
create_string_template = """
MATCH (a),(b) WHERE a.node_id = {source} AND b.node_id = {target}
MERGE (a)-[r:KNOWS {{ relationship_id:{edge_id} }}]-(b)"""
for index, relations in relations_list.iterrows():
create_string = create_string_template.format(
source=relations['from'], target=relations['to'],
edge_id=''+str(relations['from'])+'-'+str(relations['to']))
list_of_create_strings.append(create_string)
return list_of_create_strings
def add_data(language_code, edges, features, targets, driver):
"""Add nodes and relationships to neo4j"""
add_nodes_cypher = add_nodes(targets, language_code) # Returns a list of strings. Each string is a cypher query to add a node to neo4j.
node_results = run_bulk_query(add_nodes_cypher, driver) # Runs each string in the above list in a neo4j session.
add_relations_cypher = add_relations(edges, language_code) # Returns a list of strings. Each string is a cypher query to add a relationship to neo4j.
relations_results = run_bulk_query(add_relations_cypher, driver) # Runs each string in the above list in a neo4j session.
# Saving some metadata
results = {
"nodes": {"results": node_results, "length":len(add_nodes_cypher),},
"relations": {"results": relations_results, "length":len(add_relations_cypher),},
}
return results
def load_data(language_code):
"""Load data from files"""
# Saving file names to variables
edges_filename = './edges.csv'
features_filename = './features.json'
target_filename = './target.csv'
# Loading data from the file names
edges = helper.read_csv(edges_filename)
features = helper.read_json(features_filename)
targets = helper.read_csv(target_filename)
# Saving some metadata
results = {
"edges": {"length":len(edges),},
"features": {"length":len(features),},
"targets": {"length":len(targets),},
}
return edges, features, targets, results
The main code.
def process_language_files(process_language_files, driver):
"""Reads files, creates cypher queries to add nodes and relationships, runs cypher query in a neo4j session."""
edges, features, targets, reading_results = load_data(language_code) # Read files.
writing_results = add_data(language_code, edges, features, targets, driver) # Convert files nodes and relationships and add to neo4j in a neo4j session.
return {"reading_results": reading_results, "writing_results": writing_results} # Return some metadata
# Execution starts here
res=[]
for index, language_code in enumerate(['ENGLISH', 'FRENCH']):
lazy_result = dask.delayed(process_language_files)(language_code, global_driver)
res.append(lazy_result)
Result from res. These are dask delayed objects.
print(*res)
Delayed('process_language_files-a73f4a9d-6ffa-4295-8803-7fe09849c068') Delayed('process_language_files-c88fbd4f-e8c1-40c0-b143-eda41a209862')
The errors. Even if use dask.compute(), I am getting similar errors.
futures = dask.persist(*res)
AttributeError Traceback (most recent call last)
~/Code/miniconda3/envs/MVDS/lib/python3.6/site-packages/distributed/protocol/pickle.py in dumps(x, buffer_callback, protocol)
48 buffers.clear()
---> 49 result = pickle.dumps(x, **dump_kwargs)
50 if len(result) < 1000:
AttributeError: Can't pickle local object 'BoltPool.open.<locals>.opener
==========================================================================
# Name
Version
Build
Channel
dask
2020.12.0
pyhd8ed1ab_0
conda-forge
jupyterlab
3.0.3
pyhd8ed1ab_0
conda-forge
neo4j-python-driver
4.2.1
pyh7fcb38b_0
conda-forge
python
3.9.1
hdb3f193_2
You are getting this error because you are trying to share the driver object amongst your worker.
The driver object contains private data about the connection, data that do not make sense outside the process (and also are not serializable).
It is like trying to open a file somewhere and share the file descriptor somewhere else.
It won't work because the file number makes sense only within the process that generates it.
If you want your workers to access the database or any other network resource, you should give them the directions to connect to the resource.
In your case, you should not pass the global_driver as a parameter but rather the connection parameters and let each worker call get_driver to get its own driver.

Retrained inception_v3 model deployed in Cloud ML Engine always outputs the same predictions

I followed the codelab TensorFlow For Poets for transfer learning using inception_v3. It generates retrained_graph.pb and retrained_labels.txt files, which can used to make predictions locally (running label_image.py).
Then, I wanted to deploy this model to Cloud ML Engine, so that I could make online predictions. For that, I had to export the retrained_graph.pb to SavedModel format. I managed to do it by following the indications in this answer from Google's #rhaertel80 and this python file from the Flowers Cloud ML Engine Tutorial. Here is my code:
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import signature_def_utils
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.saved_model import utils as saved_model_utils
export_dir = '../tf_files/saved7'
retrained_graph = '../tf_files/retrained_graph2.pb'
label_count = 5
def build_signature(inputs, outputs):
signature_inputs = { key: saved_model_utils.build_tensor_info(tensor) for key, tensor in inputs.items() }
signature_outputs = { key: saved_model_utils.build_tensor_info(tensor) for key, tensor in outputs.items() }
signature_def = signature_def_utils.build_signature_def(
signature_inputs,
signature_outputs,
signature_constants.PREDICT_METHOD_NAME
)
return signature_def
class GraphReferences(object):
def __init__(self):
self.examples = None
self.train = None
self.global_step = None
self.metric_updates = []
self.metric_values = []
self.keys = None
self.predictions = []
self.input_jpeg = None
class Model(object):
def __init__(self, label_count):
self.label_count = label_count
def build_image_str_tensor(self):
image_str_tensor = tf.placeholder(tf.string, shape=[None])
def decode_and_resize(image_str_tensor):
return image_str_tensor
image = tf.map_fn(
decode_and_resize,
image_str_tensor,
back_prop=False,
dtype=tf.string
)
return image_str_tensor
def build_prediction_graph(self, g):
tensors = GraphReferences()
tensors.examples = tf.placeholder(tf.string, name='input', shape=(None,))
tensors.input_jpeg = self.build_image_str_tensor()
keys_placeholder = tf.placeholder(tf.string, shape=[None])
inputs = {
'key': keys_placeholder,
'image_bytes': tensors.input_jpeg
}
keys = tf.identity(keys_placeholder)
outputs = {
'key': keys,
'prediction': g.get_tensor_by_name('final_result:0')
}
return inputs, outputs
def export(self, output_dir):
with tf.Session(graph=tf.Graph()) as sess:
with tf.gfile.GFile(retrained_graph, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name="")
g = tf.get_default_graph()
inputs, outputs = self.build_prediction_graph(g)
signature_def = build_signature(inputs=inputs, outputs=outputs)
signature_def_map = {
signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def
}
builder = saved_model_builder.SavedModelBuilder(output_dir)
builder.add_meta_graph_and_variables(
sess,
tags=[tag_constants.SERVING],
signature_def_map=signature_def_map
)
builder.save()
model = Model(label_count)
model.export(export_dir)
This code generates a saved_model.pb file, which I then used to create the Cloud ML Engine model. I can get predictions from this model using gcloud ml-engine predict --model my_model_name --json-instances request.json, where the contents of request.json are:
{ "key": "0", "image_bytes": { "b64": "jpeg_image_base64_encoded" } }
However, no matter which jpeg I encode in the request, I always get the exact same wrong predictions:
Prediction output
I guess the problem is in the way the CloudML Prediction API passes the base64 encoded image bytes to the input tensor "DecodeJpeg/contents:0" of inception_v3 ("build_image_str_tensor()" method in the previous code). Any clue on how can I solve this issue and have my locally retrained model serving correct predictions on Cloud ML Engine?
(Just to make it clear, the problem is not in retrained_graph.pb, as it makes correct predictions when I run it locally; nor is it in request.json, because the same request file worked without problems when following the Flowers Cloud ML Engine Tutorial pointed above.)
First, a general warning. The TensorFlow for Poets codelab was not written in a way that is very amenable to production serving (partly manifested by the workarounds you are having to implement). You would normally export a prediction-specific graph that doesn't contain all of the extra training ops. So while we can try and hack something together that works, extra work may be needed to productionize this graph.
The approach of your code appears to be to import one graph, add some placeholders, and then export the result. This is generally fine. However, in the code shown in the question, you are adding input placeholders without actually connecting them to anything in the imported graph. You end up with a graph containing multiple disconnected subgraphs, something like (excuse the crude diagram):
image_str_tensor [input=image_bytes] -> <nothing>
keys_placeholder [input=key] -> identity [output=key]
inception_subgraph -> final_graph [output=prediction]
By inception_subgraph I mean all of the ops that you are importing.
So image_bytes is effectively a no-op and is ignored; key gets passed through; and prediction contains the result of running the inception_subgraph; since it's not using the input you are passing, it's returning the same result everytime (though I admit I actually expected an error here).
To address this problem, we would need to connect the placeholder you've created to the one that already exists in inception_subgraph to create a graph more or less like this:
image_str_tensor [input=image_bytes] -> inception_subgraph -> final_graph [output=prediction]
keys_placeholder [input=key] -> identity [output=key]
Note that image_str_tensor is going to be a batch of images, as required by the prediction service, but the inception graph's input is actually a single image. In the interest of simplicity, we're going to address this in a hacky way: we'll assume we'll be sending images one-by-one. If we ever send more than one image per request, we'll get errors. Also, batch prediction will never work.
The main change you need is the import statement, which connects the placeholder we've added to the existing input in the graph (you'll also see the code for changing the shape of the input):
Putting it all together, we get something like:
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import signature_def_utils
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.saved_model import utils as saved_model_utils
export_dir = '../tf_files/saved7'
retrained_graph = '../tf_files/retrained_graph2.pb'
label_count = 5
class Model(object):
def __init__(self, label_count):
self.label_count = label_count
def build_prediction_graph(self, g):
inputs = {
'key': keys_placeholder,
'image_bytes': tensors.input_jpeg
}
keys = tf.identity(keys_placeholder)
outputs = {
'key': keys,
'prediction': g.get_tensor_by_name('final_result:0')
}
return inputs, outputs
def export(self, output_dir):
with tf.Session(graph=tf.Graph()) as sess:
# This will be our input that accepts a batch of inputs
image_bytes = tf.placeholder(tf.string, name='input', shape=(None,))
# Force it to be a single input; will raise an error if we send a batch.
coerced = tf.squeeze(image_bytes)
# When we import the graph, we'll connect `coerced` to `DecodeJPGInput:0`
input_map = {'DecodeJPGInput:0': coerced}
with tf.gfile.GFile(retrained_graph, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, input_map=input_map, name="")
keys_placeholder = tf.placeholder(tf.string, shape=[None])
inputs = {'image_bytes': image_bytes, 'key': keys_placeholder}
keys = tf.identity(keys_placeholder)
outputs = {
'key': keys,
'prediction': tf.get_default_graph().get_tensor_by_name('final_result:0')}
}
tf.simple_save(sess, output_dir, inputs, outputs)
model = Model(label_count)
model.export(export_dir)
I believe that your error is quite simple to solve:
{ "key": "0", "image_bytes": { "b64": "jpeg_image_base64_encoded" } }
You used " to specify what, I believe, is a string. By doing that, your program is reading jpeg_image_base64_encoded instead of the actual value of the variable.
That's why you get always the same prediction.
For anyone working on deploying TensorFlow image-based models on Google Cloud ML, in particular trying to get the base64 encoding working for images (as discussed in this question), I'd recommend also having a look at the following repo that I put together. I spent a lot of time working through the deployment process and was only able to find partial information across the web and on stack overflow. This repo has a full working version of deploying a TensorFlow tf.keras model onto google cloud ML and I think it will be of help to people who are facing the same challenges I faced. Here's the github link:
https://github.com/mhwilder/tf-keras-gcloud-deployment.
The repo covers the following topics:
Training a fully convolutional tf.keras model locally (mostly just to have a model for testing the next parts)
Example code for exporting models that work with the Cloud ML Engine
Three model versions that accept different JSON input types (1. An image converted to a simple list string, 2. An image converted to a base64 encoded string, and 3. A URL that points to an image in a Google Storage bucket)
Instructions and references for general Google Cloud Platform setup
Code for preparing the input JSON files for the 3 different input types
Google Cloud ML model and version creation instructions from the console
Examples using the Google Cloud SDK to call predict on the models

Kivy - threads, queues, clocks and Python sockets

I'm brand new to Kivy, and also new to GUI, but not new to programming.
I am completely missing the boat, the canoe, and the airplane on using Kivy.
In 30 years of programming, from machine code, assembly, Fortran, C, C++, Java, Python, I've never tried to use a language such as Kivy who's documentation is this thin, because it's so new. I know it'll get better, but I'm trying to use it now.
In my code, I'm trying to implement Queueing, so that I can obtain Python socket data. In normal Python programming, I would have IPC via a Queue - put data in, get data out.
I understand from Kivy, mostly from what I've read in various forums, but can't say I've found it in the documentation at kivy.org, that I can't do the following:
Kivy needs to be in it's own thread.
Nothing in Kivy should sleep.
Nothing in Kivy should do blocking IO.
After a LOT of Google searching, the only thing I've actually found that approaches being useful, is an informative note here on StackOverFlow . However, while it almost solves my problem, the answer assumes I know more about Kivy than I do; I don't know how to incorporate the answer.
If someone could take the time to put together a COMPLETE short demo of using that example, or one of your own unique COMPLETE answers, I would much appreciate it!
Here's some short code I put together, but it doesn't work, because it blocks on the get() call.
from Queue import Queue
from kivy.lang import Builder
from kivy.app import App
from kivy.uix.boxlayout import BoxLayout
from kivy.properties import StringProperty
from kivy.clock import Clock
from threading import Thread
class ClockedQueue(BoxLayout):
text1 = StringProperty("Start")
def __init__(self):
super(ClockedQueue,self).__init__()
self.q = Queue()
self.i=0
Clock.schedule_interval(self.get, 2)
def get(self,dt):
print("get entry")
val = self.q.get()
print(self.i + val)
self.i += 1
class ClockedQueueApp(App):
def build(self):
return ClockedQueue()
class SourceQueue(Queue):
def __init__(self):
q = Queue()
for word in ['First','Second']:
q.put(word)
print("SourceQueue finished.")
def main():
th = Thread(target=SourceQueue)
th.start()
ClockedQueueApp().run()
return 0
if __name__ == '__main__':
main()
Thanks!
Here's some short code I put together, but it doesn't work, because it blocks on the get() call.
So what you really want to do is get items from your queue in a non-blocking way?
There are multiple ways to do this. The simplest seems to be to just check if the queue has any items before getting one - Queue has several methods that help with this, including checking if it is empty or setting whether get is allowed to be blocking (by setting its first argument to False). If you just do this instead of calling get on its own, you won't block things waiting for the queue to have any items - if it's empty or you can't immediately get anything, you just do nothing.
I don't know what you want to do with the items you get from the queue, but if it's short operations that don't take long then you won't need anything more than this. For instance, you could Clock.schedule_interval the get method to happen every frame, do nothing if the queue is empty, or operate on the data if you get something back. No blocking, and no messing with your own threads.
You can also create your own thread and run the blocking code in it, which is general way to deal with blocking issues, especially tasks that can't be split up into short sections that can be performed between frames. I don't know about the details of this, but it should just involve using python threads normally. You can check the source of kivy's UrlRequest for an example, this can download a web source in a background thread.
Edit: Also your SourceQueue is messed up (you override its __init__ to make a new queue that you don't store anywhere), and your clock scheduling has a meaningless third argument false which isn't even defined. I don't know what's going on here, it probably affects what you're trying to do, but doesn't matter to my general answer above.
I was finally able to create something that worked.
Thanks everyone for your suggestions!
Here's the code (because I'm new, Stackoverflow wouldn't let me post it as answering my own question until 5:00 AM)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# threads_and_kivy.py
#
'''threads_and_kivy.py
Trying to build up a foundation that satisfies the following:
- has a thread that will implement code that:
- simulates reads data from a Python socket
- works on the data
- puts the data onto a Python Queue
- has a Kivy mainthread that:
- via class ShowGUI
- reads data from the Queue
- updates a class variable of type StringProperty so it will
update the label_text property.
'''
from threading import Thread
from Queue import Queue, Empty
import time
from kivy.app import App
from kivy.uix.boxlayout import BoxLayout
from kivy.properties import StringProperty
from kivy.lang import Builder
from kivy.clock import Clock
kv='''
<ShowGUI>:
Label:
text: str(root.label_text)
'''
Builder.load_string(kv)
q = Queue()
class SimSocket():
global q
def __init__(self, queue):
self.q = queue
def put_on_queue(self):
print("<-----..threaded..SimSocket.put_on_queue(): entry")
for i in range(10):
print(".....threaded.....SimSocket.put_on_queue(): Loop " + str(i))
time.sleep(1)#just here to sim occassional data send
self.some_data = ["SimSocket.put_on_queue(): Data Loop " + str(i)]
self.q.put(self.some_data)
print("..threaded..SimSocket.put_on_queue(): thread ends")
class ShowGUI(BoxLayout):
label_text = StringProperty("Initial - not data")
global q
def __init__(self):
super(ShowGUI, self).__init__()
print("ShowGUI.__init__() entry")
Clock.schedule_interval(self.get_from_queue, 1.0)
def get_from_queue(self, dt):
print("---------> ShowGUI.get_from_queue() entry")
try:
queue_data = q.get(timeout = 5)
self.label_text = queue_data[0]
for qd in queue_data:
print("SimKivy.get_from_queue(): got data from queue: " + qd)
except Empty:
print("Error - no data received on queue.")
print("Unschedule Clock's schedule")
Clock.unschedule(self.get_from_queue)
class KivyGui(App):
def build(self):
return ShowGUI()
def main():
global q
ss = SimSocket(q)
simSocket_thread = Thread(name="simSocket",target=ss.put_on_queue)
simSocket_thread.start()
print("Starting KivyGui().run()")
KivyGui().run()
return 0
if __name__ == '__main__':
main()

Resources