Temporarily modify the current process's environment - environment-variables

I use the following code to temporarily modify environment variables.
#contextmanager
def _setenv(**mapping):
"""``with`` context to temporarily modify the environment variables"""
backup_values = {}
backup_remove = set()
for key, value in mapping.items():
if key in os.environ:
backup_values[key] = os.environ[key]
else:
backup_remove.add(key)
os.environ[key] = value
try:
yield
finally:
# restore old environment
for k, v in backup_values.items():
os.environ[k] = v
for k in backup_remove:
del os.environ[k]
This with context is mainly used in test cases. For example,
def test_myapp_respects_this_envvar():
with _setenv(MYAPP_PLUGINS_DIR='testsandbox/plugins'):
myapp.plugins.register()
[...]
My question: is there a simple/elegant way to write _setenv? I thought about actually doing backup = os.environ.copy() and then os.environ = backup .. but I am not sure if that would affect the program behavior (eg: if os.environ is referenced elsewhere in the Python interpreter).

I suggest you the following implementation:
import contextlib
import os
#contextlib.contextmanager
def set_env(**environ):
"""
Temporarily set the process environment variables.
>>> with set_env(PLUGINS_DIR=u'test/plugins'):
... "PLUGINS_DIR" in os.environ
True
>>> "PLUGINS_DIR" in os.environ
False
:type environ: dict[str, unicode]
:param environ: Environment variables to set
"""
old_environ = dict(os.environ)
os.environ.update(environ)
try:
yield
finally:
os.environ.clear()
os.environ.update(old_environ)
EDIT: more advanced implementation
The context manager below can be used to add/remove/update your environment variables:
import contextlib
import os
#contextlib.contextmanager
def modified_environ(*remove, **update):
"""
Temporarily updates the ``os.environ`` dictionary in-place.
The ``os.environ`` dictionary is updated in-place so that the modification
is sure to work in all situations.
:param remove: Environment variables to remove.
:param update: Dictionary of environment variables and values to add/update.
"""
env = os.environ
update = update or {}
remove = remove or []
# List of environment variables being updated or removed.
stomped = (set(update.keys()) | set(remove)) & set(env.keys())
# Environment variables and values to restore on exit.
update_after = {k: env[k] for k in stomped}
# Environment variables and values to remove on exit.
remove_after = frozenset(k for k in update if k not in env)
try:
env.update(update)
[env.pop(k, None) for k in remove]
yield
finally:
env.update(update_after)
[env.pop(k) for k in remove_after]
Usage examples:
>>> with modified_environ('HOME', LD_LIBRARY_PATH='/my/path/to/lib'):
... home = os.environ.get('HOME')
... path = os.environ.get("LD_LIBRARY_PATH")
>>> home is None
True
>>> path
'/my/path/to/lib'
>>> home = os.environ.get('HOME')
>>> path = os.environ.get("LD_LIBRARY_PATH")
>>> home is None
False
>>> path is None
True
EDIT2
A demonstration of this context manager is available on GitHub.

_environ = dict(os.environ) # or os.environ.copy()
try:
...
finally:
os.environ.clear()
os.environ.update(_environ)

I was looking to do the same thing but for unit testing, here is how I have done it using the unittest.mock.patch function:
def test_function_with_different_env_variable():
with mock.patch.dict('os.environ', {'hello': 'world'}, clear=True):
self.assertEqual(os.environ.get('hello'), 'world')
self.assertEqual(len(os.environ), 1)
Basically using unittest.mock.patch.dict with clear=True, we are making os.environ as a dictionary containing solely {'hello': 'world'}.
Removing the clear=True will let the original os.environ and add/replace the specified key/value pair inside {'hello': 'world'}.
Removing {'hello': 'world'} will just create an empty dictionary, os.envrion will thus be empty within the with.

In pytest you can temporarily set an environment variable using the monkeypatch fixture. See the docs for details. I've copied a snippet here for your convenience.
import os
import pytest
from typing import Any, NewType
# Alias for the ``type`` of monkeypatch fixture.
MonkeyPatchFixture = NewType("MonkeyPatchFixture", Any)
# This is the function we will test below to demonstrate the ``monkeypatch`` fixture.
def get_lowercase_env_var(env_var_name: str) -> str:
"""
Return the value of an environment variable. Variable value is made all lowercase.
:param env_var_name:
The name of the environment variable to return.
:return:
The value of the environment variable, with all letters in lowercase.
"""
env_variable_value = os.environ[env_var_name]
lowercase_env_variable = env_variable_value.lower()
return lowercase_env_variable
def test_get_lowercase_env_var(monkeypatch: MonkeyPatchFixture) -> None:
"""
Test that the function under test indeed returns the lowercase-ified
form of ENV_VAR_UNDER_TEST.
"""
name_of_env_var_under_test = "ENV_VAR_UNDER_TEST"
env_var_value_under_test = "EnvVarValue"
expected_result = "envvarvalue"
# KeyError because``ENV_VAR_UNDER_TEST`` was looked up in the os.environ dictionary before its value was set by ``monkeypatch``.
with pytest.raises(KeyError):
assert get_lowercase_env_var(name_of_env_var_under_test) == expected_result
# Temporarily set the environment variable's value.
monkeypatch.setenv(name_of_env_var_under_test, env_var_value_under_test)
assert get_lowercase_env_var(name_of_env_var_under_test) == expected_result
def test_get_lowercase_env_var_fails(monkeypatch: MonkeyPatchFixture) -> None:
"""
This demonstrates that ENV_VAR_UNDER_TEST is reset in every test function.
"""
env_var_name_under_test = "ENV_VAR_UNDER_TEST"
expected_result = "envvarvalue"
with pytest.raises(KeyError):
assert get_lowercase_env_var(env_var_name_under_test) == expected_result

For unit testing I prefer using a decorator function with optional parameters. This way I can use the modified environment values for a whole test function. The decorator below also restores the original environment values in case the function raises an Exception:
import os
def patch_environ(new_environ=None, clear_orig=False):
if not new_environ:
new_environ = dict()
def actual_decorator(func):
from functools import wraps
#wraps(func)
def wrapper(*args, **kwargs):
original_env = dict(os.environ)
if clear_orig:
os.environ.clear()
os.environ.update(new_environ)
try:
result = func(*args, **kwargs)
except:
raise
finally: # restore even if Exception was raised
os.environ = original_env
return result
return wrapper
return actual_decorator
Usage in unit tests:
class Something:
#staticmethod
def print_home():
home = os.environ.get('HOME', 'unknown')
print("HOME = {0}".format(home))
class SomethingTest(unittest.TestCase):
#patch_environ({'HOME': '/tmp/test'})
def test_environ_based_something(self):
Something.print_home() # prints: HOME = /tmp/test
unittest.main()

Related

Access stdout of Conducto Exec node

Is there an example I can see of accessing the stdout of an Exec node? I am trying to parse stdout similarly to Jenkins groovy dsl:
tobeparsed = sh(returnStdout: true, script: "myscript.sh")
That feature is not available in Conducto. When you want to send data from one node to another, there are a few ways to do so.
In these examples let's try to generate a random number in the first node, and have the second echo it.
A simple option is to use co.data to save the data in the first node, then get it from the second node.
import conducto as co
import random
img = co.Image(copy_dir=".")
def first():
# Generate a random number and save it to co.data,
# scoped to the current pipeline.
number = random.random()
co.data.pipeline.puts("random_number", str(number).encode())
print(f"Saved random number to co.data. Value: {number}")
def second():
# Read the value from co.data
stored = co.data.pipeline.gets("random_number")
number = float(stored.decode())
print(f"Read random number from co.data. Value: {number}")
def main() -> co.Serial:
with co.Serial(image=img) as output:
output["first"] = co.Exec("python pipeline.py first")
output["second"] = co.Exec("python pipeline.py second")
return output
if __name__ == '__main__':
co.main(default=main)
A more sophisticated way: the first node could be called from a co.Lazy, and generate the second node once it knows the value.
import conducto as co
import random
img = co.Image(copy_dir=".")
def first() -> co.Serial:
output = co.Serial()
# Generate a random number and encode it into command
# of second node
output["second"] = co.Exec(f"echo {random.random}")
return output
def main() -> co.Serial:
return co.Lazy("python pipeline.py first", image=img)
if __name__ == '__main__':
co.main(default=main)
In either case, save that text to a file called pipeline.py. Run it with python pipeline.py to see the tree it would generate. Run python pipeline.py --local to run it on your machine.
There's nothing magical about the name pipeline.py, just make sure it matches the commands in the co.Exec nodes.

Kedro - how to pass nested parameters directly to node

kedro recommends storing parameters in conf/base/parameters.yml. Let's assume it looks like this:
step_size: 1
model_params:
learning_rate: 0.01
test_data_ratio: 0.2
num_train_steps: 10000
And now imagine I have some data_engineering pipeline whose nodes.py has a function that looks something like this:
def some_pipeline_step(num_train_steps):
"""
Takes the parameter `num_train_steps` as argument.
"""
pass
How would I go about and pass that nested parameters straight to this function in data_engineering/pipeline.py? I unsuccessfully tried:
from kedro.pipeline import Pipeline, node
from .nodes import split_data
def create_pipeline(**kwargs):
return Pipeline(
[
node(
some_pipeline_step,
["params:model_params.num_train_steps"],
dict(
train_x="train_x",
train_y="train_y",
),
)
]
)
I know that I could just pass all parameters into the function by using ['parameters'] or just pass all model_params parameters with ['params:model_params'] but it seems unelegant and I feel like there must be a way. Would appreciate any input!
(Disclaimer: I'm part of the Kedro team)
Thank you for your question. Current version of Kedro, unfortunately, does not support nested parameters. The interim solution would be to use top-level keys inside the node (as you already pointed out) or decorate your node function with some sort of a parameter filter, which is not elegant either.
Probably the most viable solution would be to customise your ProjectContext (in src/<package_name>/run.py) class by overwriting _get_feed_dict method as follows:
class ProjectContext(KedroContext):
# ...
def _get_feed_dict(self) -> Dict[str, Any]:
"""Get parameters and return the feed dictionary."""
params = self.params
feed_dict = {"parameters": params}
def _add_param_to_feed_dict(param_name, param_value):
"""This recursively adds parameter paths to the `feed_dict`,
whenever `param_value` is a dictionary itself, so that users can
specify specific nested parameters in their node inputs.
Example:
>>> param_name = "a"
>>> param_value = {"b": 1}
>>> _add_param_to_feed_dict(param_name, param_value)
>>> assert feed_dict["params:a"] == {"b": 1}
>>> assert feed_dict["params:a.b"] == 1
"""
key = "params:{}".format(param_name)
feed_dict[key] = param_value
if isinstance(param_value, dict):
for key, val in param_value.items():
_add_param_to_feed_dict("{}.{}".format(param_name, key), val)
for param_name, param_value in params.items():
_add_param_to_feed_dict(param_name, param_value)
return feed_dict
Please also note that this issue has already been addressed on develop and will become available in the next release. The fix uses the approach from the snippet above.
As mentioned by Dmitry, kedro 0.16.0 introduced nested parameter values inside the node inputs which can be accessed via . operator:
node(func, "params:a.b", None)
whereas kedro 0.17.6 enabled overriding nested parameters with params in CLI, e.g.
kedro run --params="model.model_tuning.booster:gbtree"

Multiple outputs from one input based on features

I would like to build many outputs based on the same input, e.g. a hex and a binary from an elf.
I will do this multiple times, different places in the wscript so I'd like to wrap it in a feature.
Ideally something like:
bld(features="hex", source="output.elf")
bld(features="bin", source="output.elf")
How would I go about implementing this?
If your elf files always have the same extension, you can simply use that:
# untested, naive code
from waflib import TaskGen
#TaskGen.extension('.elf')
def process_elf(self, node): # <- self = task gen, node is the current input node
if "bin" in self.features:
bin_node = node.change_ext('.bin')
self.create_task('make_bin_task', node, bin_node)
if "hex" in self.features:
hex_node = node.change_ext('.hex')
self.create_task('make_hex_task', node, hex_node)
If not, you have to define the features you want like that:
from waflib import TaskGen
#Taskgen.feature("hex", "bin") # <- attach method features hex AND bin
#TaskGen.before('process_source')
def transform_source(self): # <- here self = task generator
self.inputs = self.to_nodes(getattr(self, 'source', []))
self.meths.remove('process_source') # <- to disable the standard process_source
#Taskgen.feature("hex") # <- attach method to feature hex
#TaskGen.after('transform_source')
def process_hex(self):
for i in self.inputs:
self.create_task("make_hex_task", i, i.change_ext(".hex"))
#Taskgen.feature("bin") # <- attach method to feature bin
#TaskGen.after('transform_source')
def process_hex(self):
for i in self.inputs:
self.create_task("make_bin_task", i, i.change_ext(".bin"))
You have to write the two tasks make_elf_task and make_bin_task. You should put all this in a separate python file and make a "plugin".
You can also define a "shortcut" to call:
def build(bld):
bld.make_bin(source = "output.elf")
bld.make_hex(source = "output.elf")
bld(features = "hex bin", source = "output.elf") # when both needed in the same place
Like that:
from waflib.Configure import conf
#conf
def make_bin(self, *k, **kw): # <- here self = build context
kw["features"] = "bin" # <- you can add bin to existing features kw
return self(*k, **kw)
#conf
def make_hex(self, *k, **kw):
kw["features"] = "hex"
return self(*k, **kw)

What is the difference between defining variables using def and without?

In relation to Jenkins DSL, what is the difference between:
def cwd = pwd()
and
cwd = pwd()
?
It's a difference of scope. When you assign a value to a variable without a "def" or other type, in a Groovy script, it's added to the "binding", the global variables for the script. That means it can be accessed from all functions within the script. It's a lot like if you had the variable defined at the top of the script.
You could wind up with unexpected behavior if multiple threads are acting on the script.
def a = {
x = 1
println x
}
def b = {
x = 2
println x
}
new Thread(a).start()
new Thread(b).start()
... could produce two ones, two twos, or a mix.
In contrast, using "def" makes a local variable:
def a = {
def x = 1
println x
}
def b = {
def x = 2
println x
}
new Thread(a).start()
new Thread(b).start()
... will always print a 1 and a 2, in arbitrary order.
It's a good question, but it's more a Groovy question.
From what I understand, defining a variable without def keyword will work from a script, but not if you were in a class method. Example from this blog post :
class MyTest {
def testMethod() {
y = 3
println y
}
}
t = new MyTest()
t.testMethod()
Variable t will be defined without problem but y definition will throw an exception.
What it means is that in our context (Jenkins pipeline) you could always define your variable without the def keyword because you are always in a script context and your variables will be bound to the script. However, I think it is good practice to use def keyword because it shows you know when you instantiate your variables, and it also can avoid some problems of duplicate variables definitions (if you define them with the def keyword at least compilation will fail if you defined the same variable twice).
Finally, from Groovy documentation :
When using def in Groovy, the actual type holder is Object (so you can
assign any object to variables defined with def, and return any kind
of object if a method is declared returning def).
So you might want to be specific and specify the type of variable you are defining. In your case you could define cwd as :
String cwd = pwd()
It would forbid you to do things like :
def cwd = pwd()
cwd = 1000 // Valid code
String cwd2 = pwd()
cwd2 = 1000 // Will fail compilation

How in Grails override config variable in external config file so that variables dependant on that variable are updated too?

I have an external and internal config in my grails application:
Config.groovy
root = "/home/baseConf"
test {
dir = root + "/testDir"
}
External.groovy
root = "/home/externalConf"
Inside controller i have:
println "${grailsApplication.config.root}"
println "${grailsApplication.config.test.dir}"
What is printed:
/home/externalConf
/home/baseConf/testDir
What I want to be printed:
/home/externalConf
/home/externalConf/testDir
What should I do to change many variables that are using one base variable in Config.groovy by exchanging this one base variable in external config file (as in example above)? Is something like this even possible?
You need to change your dir variable (inside test). Check the code below.
test {
dir = "${-> root}/testDir"
}
This change is necessary since you want dir to be evaluated when it is called and not when Config is loaded. This is called late-binding (lazy evaluation) (see the answer of Ian Roberts here: Reusing Grails variables inside Config.groovy).
It's important to note that it is related with Groovy language (not Grails).
An Eager Evaluation Strategy can be see below:
def x = 1
def s = "The value of x is: ${x}"
println s //The value of x is: 1
x = 2
println s //The value of x is: 1
For other side, a Lazy Evaluation Strategy will evaluate the expression on-demand (call-by-need):
def x = 1
def s = "The value of x is: ${-> x}"
println s //The value of x is: 1
x = 2
println s //The value of x is: 2

Resources