Using data dependency for bazel ctx.action.run_shell custom rule - bazel

I am looking at emit_rule example in bazel source tree:
https://github.com/bazelbuild/examples/blob/5a8696429e36090a75eb6fee4ef4e91a3413ef13/rules/shell_command/rules.bzl
I want to add a data dependency to the custom rule. My understanding of dependency attributes documentation calls for data attr label_list to be used, but it does not appear to work?
# This example copied from docs
def _emit_size_impl(ctx):
in_file = ctx.file.file
out_file = ctx.actions.declare_file("%s.pylint" % ctx.attr.name)
ctx.actions.run_shell(
inputs = [in_file],
outputs = [out_file],
command = "wc -c '%s' > '%s'" % (in_file.path, out_file.path),
)
return [DefaultInfo(files = depset([out_file]),)]
emit_size = rule(
implementation = _emit_size_impl,
attrs = {
"file": attr.label(mandatory = True,allow_single_file = True,),
"data": attr.label_list(allow_files = True),
# ^^^^^^^ Above does not appear to be sufficient to copy data dependency into sandbox
},
)
With this rule emit_size(name = "my_name", file = "my_file", data = ["my_data"]) I want to see my_data copied to bazel-out/ before running the command. How do I go about doing this?

The data files should be added as inputs to the actions that need those files, e.g. something like this:
def _emit_size_impl(ctx):
in_file = ctx.file.file
out_file = ctx.actions.declare_file("%s.pylint" % ctx.attr.name)
ctx.actions.run_shell(
inputs = [in_file] + ctx.files.data,
outputs = [out_file],
# For production rules, probably should use ctx.actions.run() and
# ctx.actions.args():
# https://bazel.build/rules/lib/Args
command = "echo data is: ; %s ; wc -c '%s' > '%s'" % (
"cat " + " ".join([d.path for d in ctx.files.data]),
in_file.path, out_file.path),
)
return [DefaultInfo(files = depset([out_file]),)]
emit_size = rule(
implementation = _emit_size_impl,
attrs = {
"file": attr.label(mandatory = True, allow_single_file = True,),
"data": attr.label_list(allow_files = True),
},
)
BUILD:
load(":defs.bzl", "emit_size")
emit_size(
name = "size",
file = "file.txt",
data = ["data1.txt", "data2.txt"],
)
$ bazel build size
INFO: Analyzed target //:size (4 packages loaded, 9 targets configured).
INFO: Found 1 target...
INFO: From Action size.pylint:
data is:
this is data
this is other data
Target //:size up-to-date:
bazel-bin/size.pylint
INFO: Elapsed time: 0.323s, Critical Path: 0.02s
INFO: 2 processes: 1 internal, 1 linux-sandbox.
INFO: Build completed successfully, 2 total actions
$ cat bazel-bin/size.pylint
22 file.txt

Related

Is there a way to pass the 'bazel run' result file to the next rule file in bazel?

I created a first rule file to geneate a scirpt using the ctx.actions.expand_template and ran it. and I wanted that pass the result of bazel run with first rule to the next 2nd rule file as a resource. But, I could't get the result which is generated by bazel run 1st rule in the 2nd rule.
It does not mean the script created by the first rule, but the file created when the script is executed.
Below example is what I tested.
This is bazel rule file
def _1st_rule_impl(ctx):
...
out = ctx.outputs.executable
template = ctx.file.my_template
ctx.actions.expand_template(
output = out,
template = template,
substitutions = {
"{ARG1}: ctx.attr.my_file_name,
"{ARG2}: ctx.attr.my_file_content,
}
return [
DefaultInfo(
files = depset([out]),
runfiles = ctx.runfiles(files = [out])
)
]
1st_rule = rule(
implementation = _1st_rule_impl,
attrs = {
"my_template":attr.label(
allow_single_file = True,
default = Label("#my_test//my_rules:my_script.sh.template"),
),
"my_file_name": attr.string(
default = "myfile.txt",
),
"my_file_content": attr.string(
default = "hello world",
),
},
executable = True,
)
def _2nd_rule_impl(ctx):
...
for dep in ctx.attr.deps:
//
// HOW CAN I GET THE RESULT OF `bazel run` THE '1st_rule'?
// I WANT TO GET THE `myfile.txt` WHICH IS GENERATED BY '1st_rule'
//
return [
DefaultInfo(
files = depset([out]),
runfiles = ctx.runfiles(files = [out]),
)
]
2nd_rule = rule(
implementation = _2nd_rule_impl,
attrs = {
"dep":attr.label_list(
mandatory= True,
),
...
},
executable = True,
)
This is BUILD file
1st_rule(
name = "my_1st_rule",
my_file_name = "myfile.txt",
my_file_content = "hello world",
)
2nd_rule(
name = "my_2nd_rule",
dep = [
":my_1st_rule",
],
)
This is template shell script
...
function create_file() {
echo "{ARG2}" > "{ARG1}"
}
...
I tested with the example described above.
There are several ways to access the outputs of dependencies. Perhaps, the simplest is ctx.files. For example, print(ctx.files.dep) in _2nd_rule_impl should show the output of my_1st_rule when my_2nd_rule is analyzed.

How to get bazel genrule to access transitive dependencies?

I have the following in a BUILD file:
proto_library(
name = "proto_default_library",
srcs = glob(["*.proto"]),
visibility = ["//visibility:public"],
deps = [
"#go_googleapis//google/api:annotations_proto",
"#grpc_ecosystem_grpc_gateway//protoc-gen-openapiv2/options:options_proto",
],
)
genrule(
name = "generate-buf-image",
srcs = [
":buf_yaml",
":buf_breaking_image_json",
":protos",
],
exec_tools = [
":proto_default_library",
"//buf:generate-buf-image-sh",
"//buf:generate-buf-image",
],
outs = ["buf-image.json"],
cmd = "$(location //buf:generate-buf-image-sh) --buf-breaking-image-json=$(location :buf_breaking_image_json) $(location :protos) >$#",
)
While executing $(location //buf:generate-buf-image-sh), glob(["*.proto"]) of proto_default_library can be seen in the sandbox but the proto files of #go_googleapis//google/api:annotations_proto and #grpc_ecosystem_grpc_gateway//protoc-gen-openapiv2/options:options_proto cannot. The same goes for the dependencies of //buf:generate-buf-image-sh.
Do I need to explicitly list out all transitive dependencies so they can be processed by generate-buf-image? Is there a programmatic way to do that?
Since genrules are pretty generic, a genrule sees only the default provider of a target, which usually just has the main outputs of that target (e.g., for java_library, a jar of the classes of that library, for proto_library, the proto files of that library). So to get more detailed information, you would write a Starlark rule to access more specific providers. For example:
WORKSPACE:
load("#bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "rules_proto",
sha256 = "66bfdf8782796239d3875d37e7de19b1d94301e8972b3cbd2446b332429b4df1",
strip_prefix = "rules_proto-4.0.0",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/rules_proto/archive/refs/tags/4.0.0.tar.gz",
"https://github.com/bazelbuild/rules_proto/archive/refs/tags/4.0.0.tar.gz",
],
)
load("#rules_proto//proto:repositories.bzl", "rules_proto_dependencies", "rules_proto_toolchains")
rules_proto_dependencies()
rules_proto_toolchains()
defs.bzl:
def _my_rule_impl(ctx):
output = ctx.actions.declare_file(ctx.attr.name + ".txt")
args = ctx.actions.args()
args.add(output)
inputs = []
for src in ctx.attr.srcs:
proto_files = src[ProtoInfo].transitive_sources
args.add_all(proto_files)
inputs.append(proto_files)
ctx.actions.run(
inputs = depset(transitive = inputs),
executable = ctx.attr._tool.files_to_run,
arguments = [args],
outputs = [output],
)
return DefaultInfo(files = depset([output]))
my_rule = rule(
implementation = _my_rule_impl,
attrs = {
"srcs": attr.label_list(providers=[ProtoInfo]),
"_tool": attr.label(default = "//:tool"),
},
)
ProtoInfo is here: https://bazel.build/rules/lib/ProtoInfo
BUILD:
load(":defs.bzl", "my_rule")
proto_library(
name = "proto_a",
srcs = ["proto_a.proto"],
deps = [":proto_b"],
)
proto_library(
name = "proto_b",
srcs = ["proto_b.proto"],
deps = [":proto_c"],
)
proto_library(
name = "proto_c",
srcs = ["proto_c.proto"],
)
my_rule(
name = "foo",
srcs = [":proto_a"],
)
sh_binary(
name = "tool",
srcs = ["tool.sh"],
)
proto_a.proto:
package my_protos_a;
message ProtoA {
optional int32 a = 1;
}
proto_b.proto:
package my_protos_b;
message ProtoB {
optional int32 b = 1;
}
proto_c.proto:
package my_protos_c;
message ProtoC {
optional int32 c = 1;
}
tool.sh:
output=$1
shift
echo input protos: $# > $output
$ bazel build foo
INFO: Analyzed target //:foo (40 packages loaded, 172 targets configured).
INFO: Found 1 target...
Target //:foo up-to-date:
bazel-bin/foo.txt
INFO: Elapsed time: 0.832s, Critical Path: 0.02s
INFO: 5 processes: 4 internal, 1 linux-sandbox.
INFO: Build completed successfully, 5 total actions
$ cat bazel-bin/foo.txt
input protos: proto_a.proto proto_b.proto proto_c.proto

How do I get the files in the build directory in another bazel rule

when use the python tool to generate the .cpp/.hpp code like the protobuf tool, but I don't know how many files will be generated, so it's a little not the same as protbuf tool.
In one genrule:
def __generate_core_ifce_impl(ctx):
...
output_file = ctx.actions.declare_directory(out)
cmd = """
mkdir -p {path};
""".format(path = output_file.path)
cmd += """
{tools} -i {src} -o {output_dir}
""".format(tools = tools, src = ctx.files.srcs, output_dir = output_file.path)
ctx.actions.run_shell(
command = cmd,
inputs = ctx.files.srcs,
outputs = [output_file]
)
return [DefaultInfo(files = depset([output_file])),]
_generate_core_ifce = rule (
implementation = __generate_core_ifce_impl,
attrs = {
"srcs": attr.label_list(mandatory = False, allow_files = True),
"tools": attr.label_list(mandatory = True, allow_files = True),
"out": attr.sting(mandatory = True),
},
)
In output_file directory , there will generate some *.cpp && *.hpp, but i can't know their names
then in another rule , cc_library will use *.cpp && *.hpp which are in output_file directory
the questions is: how to write this rule?
I can't get the files in the output_file diectory,
so I can't write the cc_library?
You should be able to use the name of the target, and the cc_library will use the files that are given in the DefaultInfo, e.g.:
_generate_core_ifce(
name = "my_generate_core_ifce_target",
...
)
cc_library(
name = "my_cc_library_target",
srcs = [":my_generate_core_ifce_target"],
...
)
edit: adding an example:
BUILD:
load(":defs.bzl", "my_rule")
my_rule(
name = "my_target",
)
cc_binary(
name = "cc",
srcs = [":my_target"],
)
defs.bzl:
def _impl(ctx):
output_dir = ctx.actions.declare_directory("my_outputs")
command = """
mkdir -p {output_dir}
cat > {output_dir}/main.c <<EOF
#include "stdio.h"
#include "mylib.h"
int main() {
printf("hello world %d\\n", get_num());
return 0;
}
EOF
cat > {output_dir}/mylib.c <<EOF
int get_num() {
return 42;
}
EOF
cat > {output_dir}/mylib.h <<EOF
int get_num();
EOF
""".replace("{output_dir}", output_dir.path)
ctx.actions.run_shell(
command = command,
outputs = [output_dir]
)
return [DefaultInfo(files = depset([output_dir])),]
my_rule = rule(
implementation = _impl,
)
usage:
$ bazel run cc
Starting local Bazel server and connecting to it...
INFO: Analyzed target //:cc (15 packages loaded, 57 targets configured).
INFO: Found 1 target...
Target //:cc up-to-date:
bazel-bin/cc
INFO: Elapsed time: 3.626s, Critical Path: 0.06s
INFO: 8 processes: 4 internal, 4 linux-sandbox.
INFO: Build completed successfully, 8 total actions
INFO: Build completed successfully, 8 total actions
hello world 42

cc_binary() cannot be called during the analysis phase

I've tried to convert a bazel macro to a rule, so it's parsing is done now on analysis time rather than on loading time, this makes calling native.cc_binary impossible to do
def _emcc_binary(ctx):
includejs = False
includehtml = False
linkopts = list(ctx.attr.linkopts)
linkopts.append("-s EXTRA_EXPORTED_RUNTIME_METHODS='[\"ccall\", \"cwrap\"]'")
if ctx.attr.name.endswith(".html"):
basename = ctx.attr.name[:-5]
includehtml = True
includejs = True
elif ctx.attr.name.endswith(".js"):
basename = ctx.attr.name[:-3]
includejs = True
outputs = []
if includejs:
outputs.append(basename + ".js")
if ctx.attr.wasm:
outputs.append(basename + ".wasm")
if ctx.attr.memory_init_file:
outputs.append(basename + ".mem")
if ctx.attr.worker:
outputs.append(basename + ".worker.js")
linkopts.append("--proxy-to-worker")
if includehtml:
outputs.append(basename + ".html")
if not ctx.attr.wasm:
linkopts.append("-s WASM=0")
linkopts.append("--memory-init-file %d" % ctx.attr.memory_init_file)
if includejs:
tarfile = ctx.attr.name + ".tar"
# we'll generate a tarfile and extract multiple outputs
native.cc_binary(name = tarfile, linkopts = linkopts, **ctx.attr.kwargs)
native.genrule(
name = "emcc_extract_" + tarfile,
srcs = [tarfile],
outs = outputs,
output_to_bindir = 1,
testonly = ctx.attr.kwargs.get("testonly"),
cmd = """
tar xvf $< -C "$(#D)"/$$(dirname "%s")
""" % [outputs[0]],
)
else:
native.cc_binary(name = ctx.attr.name, linkopts = linkopts, **ctx.attr.kwargs)
# we'll generate a tarfile and extract multiple outputs
emcc_binary = rule(
implementation = _emcc_binary,
attrs = {
"memory_init_file": attr.int(default = 0),
"wasm": attr.bool(default = True),
"worker": attr.bool(default = False),
"srcs": attr.label_list(allow_files = True),
"linkopts": attr.string_list(),
"noop": attr.bool(default = False),
"kwargs": attr.label_keyed_string_dict()
},
)
output:
cc_binary() cannot be called during the analysis phase
ERROR: Analysis of target '//tests:hi.js' failed; build aborted: Analysis of target '//tests:hi.js' failed; build aborted
Correct, rules cannot be called from the implementations of other rules. Typically the solution is to use both rules and macros, where the macro creates and wires up the other rules (including your custom Starlark rules) where necessary.
Looking over your code, it's not clear that you really need to change the macro to a rule. Might be helpful if you could describe your end goal (feel free to reach out to bazel-discuss if you'd like more help here). That said, if you really wanted, you can replace native.genrule with actions and native.cc_binary with cc_common, which are designed to work with rules.

Idiomatic retrieval of the Bazel execution path

I'm working on my first custom Bazel rules. The rules allow the running of bats command line tests.
I've included the rule definition below verbatim. I'm pretty happy with it so far but there's one part which feels really ugly and non-standard. If the rule user adds a binary dependency to the rule then I make sure that the binary appears on the PATH so that it can be tested. At the moment I do this by making a list of the binary paths and then appending them with $PWD which is expanded inside the script to the complete execution path. This feels hacky and error prone.
Is there a more idiomatic way to do this? I don't believe I can access the execution path in the rule due to it not being created until the execution phase.
Thanks for your help!
BATS_REPOSITORY_BUILD_FILE = """
package(default_visibility = [ "//visibility:public" ])
sh_binary(
name = "bats",
srcs = ["libexec/bats"],
data = [
"libexec/bats-exec-suite",
"libexec/bats-exec-test",
"libexec/bats-format-tap-stream",
"libexec/bats-preprocess",
],
)
"""
def bats_repositories(version="v0.4.0"):
native.new_git_repository(
name = "bats",
remote = "https://github.com/sstephenson/bats",
tag = version,
build_file_content = BATS_REPOSITORY_BUILD_FILE
)
BASH_TEMPLATE = """
#!/usr/bin/env bash
set -e
export TMPDIR="$TEST_TMPDIR"
export PATH="{bats_bins_path}":$PATH
"{bats}" "{test_paths}"
"""
def _dirname(path):
prefix, _, _ = path.rpartition("/")
return prefix.rstrip("/")
def _bats_test_impl(ctx):
runfiles = ctx.runfiles(
files = ctx.files.srcs,
collect_data = True,
)
tests = [f.short_path for f in ctx.files.srcs]
path = ["$PWD/" + _dirname(b.short_path) for b in ctx.files.deps]
sep = ctx.configuration.host_path_separator
ctx.file_action(
output = ctx.outputs.executable,
executable = True,
content = BASH_TEMPLATE.format(
bats = ctx.executable._bats.short_path,
test_paths = " ".join(tests),
bats_bins_path = sep.join(path),
),
)
runfiles = runfiles.merge(ctx.attr._bats.default_runfiles)
return DefaultInfo(
runfiles = runfiles,
)
bats_test = rule(
attrs = {
"srcs": attr.label_list(
allow_files = True,
),
"deps": attr.label_list(),
"_bats": attr.label(
default = Label("#bats//:bats"),
executable = True,
cfg = "host",
),
},
test = True,
implementation = _bats_test_impl,
)
This should be easy to support from Bazel 0.8.0 which will be released in ~2 weeks.
In your skylark implementation you should do ctx.expand_location(binary) where binary should be something like $(execpath :some-label) so you might want to just format the label you got from the user with the $(execpath) and bazel will make sure to give you the execution location of that label.
Some relevant resources:
$location expansion in Bazel
https://github.com/bazelbuild/bazel/issues/2475
https://github.com/bazelbuild/bazel/commit/cff0dc94f6a8e16492adf54c88d0b26abe903d4c

Resources