We are using Ivy for storing our binaries and managing dependencies.
With the purpose of managing the impact of changes in modules, we would need to gather this information from the repository:
Given a module name, organization, branch and revision, obtain all modules that are directly or transitively dependent on that module (with branch and revision). Particularly interesting are the impacted "top-level" (application) modules.
Is there any tool suitable for this task? Otherwise, what would you suggest to solve it?
I've tried the repreport task without much success, as it doesn't seem appropriate to browse the dependencies in reverse.
We have Jenkins, and have a bunch of jars we build that other applications depend upon. We use a Maven repository for storing these jars. In Jenkins, a developer can take a particular jar build, and promote that jar into our Maven repo.
The problem is that this jar breaks some of the applications that use that jar. Therefore, I want to be able to build those projects whenever a jar was promoted to our Maven repo. This list gives me the names of the Jenkins projects that depend upon a particular jar.
Now, our projects were built with ant pre-ivy, so I created several macros that helped developers use Ivy. We have replaced the <jar> task with the <jar.macro/> task. It is like the <jar> task except I take the ivy.xml, convert it into a Maven pom.xml and embed it in the jar. I take a look at the build.xml for these <jar.macro> tasks so I know the names of the jars it builds. You'll probably have to munge this to fit your own.
The following Perl script goes through our Subversion projects, looks for the ones with an ivy.xml. Goes through the build.xml, sees what jars are built, associates them with the project, and then goes through the ivy.xml to see what projects it depends upon.
You're welcome to use it.
#! /usr/bin/env perl
#
use warnings;
use strict;
use autodie;
use feature qw(say);
use XML::Simple;
use Data::Dumper;
use File::Find;
use Pod::Usage;
use constant {
SVN_REPO => 'http://svn/rsvp',
IVY_XML_FILE => 'ivy.xml',
SVN => '/usr/local/bin/svn',
};
#
# These are projects we don't want to include, but
# they have 'ivy.xml' in them anyway
#
use constant BAD_PROJECTS => qw( # These are projects with an ivy.xml, but we don't want
...
);
my %bad_projects = map { $_ => 1 } BAD_PROJECTS;
#
# Find the branch or use trunk (First parameter used
#
my $branch = shift; # This is the branch to search on
if ( not defined $branch ) {
$branch = "trunk";
}
my $branch_url;
if ( $branch eq "trunk" ) {
$branch_url = $branch;
}
else {
$branch_url = "branches/$branch";
}
#
# Use "svn ls" to find all the projects that have an ivy.xml
#
open my $project_fh, "-|", "#{[SVN]} ls #{[SVN_REPO]}/$branch_url";
my %ivy_projects;
say "FINDING IVY PROJECTS";
while ( my $svn_project_name = <$project_fh> ) {
chomp $svn_project_name;
$svn_project_name =~ s|/$||; # Remove the trailing slash
next if exists $bad_projects{$svn_project_name};
#
# See if an ivy.xml file exists in this project via "svn ls"
#
my $svn_ivy_project_url = SVN_REPO . "/$branch_url/$svn_project_name";
my $ivy_file = "$svn_ivy_project_url/ivy.xml";
my $error = system qq( #{[SVN]} ls $ivy_file > /dev/null 2>&1 );
next if $error; # No ivy.xml
say " " x 4 . "Ivy Project: $svn_ivy_project_url";
#
# Ivy project exists. Create a new "project" object to store all the info
#
my $project = Local::Project->new($svn_ivy_project_url);
my $ivy_xml = qx( #{[SVN]} cat $svn_ivy_project_url/ivy.xml );
$project->Ivy_xml( $ivy_xml );
my $build_xml = qx( #{[SVN]} cat $svn_ivy_project_url/build.xml );
$project->Build_xml( $build_xml );
$ivy_projects{ $svn_project_name } = $project;
}
#
# Go through build.xml files and look for all jar.macro tasks. Go through
# these and map the Ivy Artifact Name to the project that builds it.
# The Ivy Artifact Name could be from the ivy.xml file. However, if
# the paramter pom.artifact.name exists, the Ivy Artifact Name will be that.
#
my %jars_to_project_name;
for my $svn_project ( sort keys %ivy_projects ) {
my $project = $ivy_projects{$svn_project};
my $url = $project->Url;
my $build_ref = $project->Build_ref;
my $ivy_ref = $project->Ivy_ref;
my $build_xml_project_name = $build_ref->{name};
say qq(Parsing build.xml of "$svn_project");
#
# Go through all targets looking for jar.macro tasks
#
for my $target ( keys %{ $build_ref->{target} } ) {
next unless $build_ref->{target}->{$target}->{"jar.macro"};
#
# Contains a Jar Macro Task: This could be an array reference?
#
my #jar_macros;
my $jar_macro_task = $build_ref->{target}->{$target}->{"jar.macro"};
if ( ref $jar_macro_task eq "ARRAY" ) {
#jar_macros = #{ $jar_macro_task };
} else {
#jar_macros = ( $jar_macro_task );
}
for my $jar_macro ( #jar_macros ) {
#
# If there is no "pom.artifact.name" in the jar.macro
# task, we need to use the name of the module in the
# ivy.xml file. If pom.artifact.name does exist, we will
# use that. We also need to find out if the name contains
# "${ant.project.name}". If it does, we need to replace that
# name with the name of the build.xml project entity name.
#
my $ivy_jar_name;
if ( not exists $jar_macro->{"pom.artifact.name"} ) {
$ivy_jar_name = $ivy_ref->{info}->{module};
}
else { # Name of jar is in the jar.macro task
$ivy_jar_name = $jar_macro->{"pom.artifact.name"};
my $ant_project_name = $build_ref->{name};
$ivy_jar_name =~ s/\${ant\.project\.name}/$build_xml_project_name/;
}
$jars_to_project_name{$ivy_jar_name} = $svn_project
}
}
}
#
# At this point, we now have all of the information in the ivy.xml file
# and the mapping of artifact name to the project they're in in Subversion.
#
# Now, we need to go through the ivy.xml files, find all com.travelclick
# artifact dependencies, and map them back to the SVN projects.
#
#
# A Hashes of Arrays. This will be keyed by BASE JAR svn project. The array
# will be a list of all the other projects that depend upon that BASE JAR svn
# project.
#
my %project_dependencies;
say "MAPPING IVY.XML back to the dependent projects.";
for my $project ( sort keys %ivy_projects ) {
say "On $project";
my $ivy_ref = $ivy_projects{$project}->Ivy_ref;
my $dependencies_ref = $ivy_ref->{dependencies}->{dependency};
for my $dependency ( sort keys %{ $dependencies_ref } ) {
next unless exists $dependencies_ref->{$dependency}->{org};
next unless $dependencies_ref->{$dependency}->{org} eq 'com.travelclick';
#
# This is a TravelClick Dependency. Map this back to the SVN Project
# which produced this jar.
#
# We now have the SVN project that contained the dependent jar and the
# svn project that mentions that jar in the ivy.xml project.
#
my $svn_project = $jars_to_project_name{$dependency};
next if not $svn_project;
if ( not exists $project_dependencies{$svn_project} ) {
$project_dependencies{$svn_project} = {};
}
$project_dependencies{$svn_project}->{$project} = 1;
}
}
for my $project ( sort { lc $a cmp lc $b } keys %project_dependencies ) {
printf "%-30.30s", "$project - ";
say join ( "-$branch,", sort { lc $a cmp lc $b } keys %{ $project_dependencies{$project} } ) . "-$branch";
}
package Local::Project;
use XML::Simple;
sub new {
my $class = shift;
my $project_url = shift;
my $self = {};
bless $self, $class;
$self->Url($project_url);
return $self;
}
sub Url {
my $self = shift;
my $url = shift;
if ( defined $url ) {
$self->{URL} = $url;
}
return $self->{URL};
}
sub Ivy_xml {
my $self = shift;
my $ivy_xml = shift;
if ( defined $ivy_xml ) {
$self->{IVY_XML} = $ivy_xml;
$self->Ivy_ref($ivy_xml); #Generate the ref structure while you're at it.
}
return $self->{IVY_XML};
}
sub Build_xml {
my $self = shift;
my $build_xml = shift;
if ( defined $build_xml ) {
$self->{BUILD_XML} = $build_xml;
$self->Build_ref($build_xml); #Generate the ref structure while you're at it.
}
return $self->{BUILD_XML};
}
sub Ivy_ref {
my $self = shift;
my $ivy_xml = shift;
if ( defined $ivy_xml ) {
$self->{IVY_REF} = XMLin("$ivy_xml");
}
return $self->{IVY_REF};
}
sub Build_ref {
my $self = shift;
my $build_xml = shift;
if ( defined $build_xml ) {
$self->{BUILD_REF} = XMLin("$build_xml");
}
return $self->{BUILD_REF};
}
=pod
=head1 NAME
find_dependencies.pl
=head1 SYNOPSIS
$ find_depdendencies.pl [ <branch> ]
Where:
=over 4
=item *
<branch> - Name of the branch. If not given, it is assumed to be trunk.
=back
=head1 DESCRIPTION
This program goes through the Subversion repository looking for
projects that have an C<ivy.xml> file in them. If this file is found,
the project is parsed to see what jars produced by TravelClick it is
dependent upon, and what jars it produces.
Once all of the Subversion projects are parsed. The jars are listed
along with their dependent projects.
The purpose of this program is to build a list of projects that should
be automaticlaly rebuilt when a Jar is promoted to the Maven repository.
=head1 PERL MODULE DEPENDENCIES
This project is dependent upon the following Perl modules that must be
installed before this program can be executed:
=over 4
=item XML::Simple
=back
This project must also be executed on Perl 5.12 or greater.
=head1 BUGS
=over 4
=item *
The list assumes Jenkins projects are named after the Subversion
project with the branch or trunk tacked on to the end.
=back
Related
I have the following rule definition:
helm_action = rule(
attrs = {
…
"cluster_aliases": attr.string_dict(
doc = "key value pair matching for creating a cluster alias where the name used to evoke a cluster alias is different than the actual cluster's name",
default = DEFAULT_CLUSTER_ALIASES,
),
…
},
…
)
I'd like for DEFAULT_CLUSTER_ALIASES value to be based on the host os but
DEFAULT_CLUSTER_ALIASES = {
"local": select({
"#platforms//os:osx": "docker-desktop",
"#platforms//os:linux": "minikube",
})
}
errors with:
Error in string_dict: expected value of type 'string' for dict value element, but got select({"#platforms//os:osx": "docker-desktop", "#platforms//os:linux": "minikube"}) (select)
How do I go about defining DEFAULT_CLUSTER_ALIASES based on the host os?
Judging from https://github.com/bazelbuild/bazel/issues/2045, selecting based on host os is not possible.
When you create a rule or macro, it is evaluated during the loading phase, before command-line flags are evaluated. Bazel needs to know the default value in your build rule helm_action during the loading phase but can't because it hasn't parsed the command line and analysed the build graph.
The command line is parsed and select statements are evaluated during the analysis phase. As a broad rule, if your select statement isn't in a BUILD.bazel then it's not going to work. So the easiest way to achieve what you are after is to create a macro that uses your rule injecting the default. e.g.
# helm_action.bzl
# Add an '_' prefix to your rule to make the rule private.
_helm_action = rule(
attrs = {
…
"cluster_aliases": attr.string_dict(
doc = "key value pair matching for creating a cluster alias where the name used to evoke a cluster alias is different than the actual cluster's name",
# Remove default attribute.
),
…
},
…
)
# Wrap your rule in a publicly exported macro.
def helm_action(**kwargs):
_helm_action(
name = kwargs["name"],
# Instantiate your rule with a select.
cluster_aliases = DEFAULT_CLUSTER_ALIASES,
**kwargs,
)
It's important to note the difference between a macro and a rule. A macro is a way of generating a set of targets using other build rules, and actually expands out roughly equivalent to it's contents when used in a BUILD file. You can check this by querying a target with the --output build flag. e.g.
load(":helm_action.bzl", "helm_action")
helm_action(
name = "foo",
# ...
)
You can query the output using the command;
bazel query //:foo --output build
This will demonstrate that the select statement is being copied into the BUILD file.
A good example of this approach is in the rules_docker repository.
EDIT: The question was clarified, so I've got an updated answer below but will keep the above answer in case it is useful to others.
A simple way of achieving what you are after is to use Bazels toolchain api. This is a very flexible API and is what most language rulesets use in Bazel. e.g.
Create a build file with your toolchains;
# //helm:BUILD.bazel
load(":helm_toolchains.bzl", "helm_toolchain")
toolchain_type(name = "toolchain_type")
helm_toolchain(
name = "osx",
cluster_aliases = {
"local": "docker-desktop",
},
)
toolchain(
name = "osx_toolchain",
toolchain = ":osx",
toolchain_type = ":toolchain_type",
exec_compatible_with = ["#platforms//os:macos"],
# Optionally use to restrict target platforms too.
# target_compatible_with = []
)
helm_toolchain(
name = "linux",
cluster_aliases = {
"local": "minikube",
},
)
toolchain(
name = "linux_toolchain",
toolchain = ":linux",
toolchain_type = ":toolchain_type",
exec_compatible_with = ["#platforms//os:linux"],
)
Register your toolchains so that Bazel knows what to look for;
# //:WORKSPACE
# the rest of your workspace...
register_toolchains("//helm:all")
# You may need to register your execution platforms too...
# register_execution_platforms("//your_platforms/...")
Implement the toolchain backend;
# //helm:helm_toolchains.bzl
HelmToolchainInfo = provider(fields = ["cluster_aliases"])
def _helm_toolchain_impl(ctx):
toolchain_info = platform_common.ToolchainInfo(
helm_toolchain_info = HelmToolchainInfo(
cluster_aliases = ctx.attr.cluster_aliases,
),
)
return [toolchain_info]
helm_toolchain = rule(
implementation = _helm_toolchain_impl,
attrs = {
"cluster_aliases": attr.string_dict(),
},
)
Update helm_action to use toolchains. e.g.
def _helm_action_impl(ctx):
cluster_aliases = ctx.toolchains["#your_repo//helm:toolchain_type"].helm_toolchain_info.cluster_aliases
#...
helm_action = rule(
_helm_action_impl,
attrs = {
#…
},
toolchains = ["#your_repo//helm:toolchain_type"]
)
I'm working on a problem in which I only want to create a particular rule if a certain Bazel config has been specified (via '--config'). We have been using Bazel since 0.11 and have a bunch of build infrastructure that works around former limitations in Bazel. I am incrementally porting us up to newer versions. One of the features that was missing was compiler transitions, and so we rolled our own using configs and some external scripts.
My first attempt at solving my problem looks like this:
load("#rules_cc//cc:defs.bzl", "cc_library")
# use this with a select to pick targets to include/exclude based on config
# see __build_if_role for an example
def noop_impl(ctx):
pass
noop = rule(
implementation = noop_impl,
attrs = {
"deps": attr.label_list(),
},
)
def __sanitize(config):
if len(config) > 2 and config[:2] == "//":
config = config[2:]
return config.replace(":", "_").replace("/", "_")
def build_if_config(**kwargs):
config = kwargs['config']
kwargs.pop('config')
name = kwargs['name'] + '_' + __sanitize(config)
binary_target_name = kwargs['name']
kwargs['name'] = binary_target_name
cc_library(**kwargs)
noop(
name = name,
deps = select({
config: [ binary_target_name ],
"//conditions:default": [],
})
)
This almost gets me there, but the problem is that if I want to build a library as an output, then it becomes an intermediate dependency, and therefore gets deleted or never built.
For example, if I do this:
build_if_config(
name="some_lib",
srcs=[ "foo.c" ],
config="//:my_config",
)
and then I run
bazel build --config my_config //:some_lib
Then libsome_lib.a does not make it to bazel-out, although if I define it using cc_library, then it does.
Is there a way that I can just create the appropriate rule directly in the macro instead of creating a noop rule and using a select? Or another mechanism?
Thanks in advance for your help!
As I noted in my comment, I was misunderstanding how Bazel figures out its dependencies. The create a file section of The Rules Tutorial explains some of the details, and I followed along here for some of my solution.
Basically, the problem was not that the built files were not sticking around, it was that they were never getting built. Bazel did not know to look in the deps variable and build those things: it seems I had to create an action which uses the deps, and then register an action by returning a (list of) DefaultInfo
Below is my new noop_impl function
def noop_impl(ctx):
if len(ctx.attr.deps) == 0:
return None
# ctx.attr has the attributes of this rule
dep = ctx.attr.deps[0]
# DefaultInfo is apparently some sort of globally available
# class that can be used to index Target objects
infile = dep[DefaultInfo].files.to_list()[0]
outfile = ctx.actions.declare_file('lib' + ctx.label.name + '.a')
ctx.actions.run_shell(
inputs = [infile],
outputs = [outfile],
command = "cp %s %s" % (infile.path, outfile.path),
)
# we can also instantiate a DefaultInfo to indicate what output
# we provide
return [DefaultInfo(files = depset([outfile]))]
I want to create a groovy function in my Jenkins job that looks into a folder and deletes all files who are older than X days.
So I start looking in the internet and found different kind of solutions.
At first I create a .groovy file with Visual Studio Code on my local PC to understand how it works. That is the reason why my code looks not similar to the codes in the internet because I changed it so that I understand how the code works.
def deleteFilesOlderThanDays(int daysBack, String path) {
def DAY_IN_MILLIS = 24 * 60 * 60 * 1000
File directory = new File(path)
if(directory.exists()){
File[] listFiles = directory.listFiles()
for(File listFile : listFiles) {
def days_from_now = ( (System.currentTimeMillis() - listFile.lastModified()) /(DAY_IN_MILLIS))
if(days_from_now > daysBack) {
println('------------')
println('file is older')
println(listFile)
}
else{
println('------------')
println('File is not older')
println(listFile)
}
}//End: for(File listFile : listFiles) {
}//End: if(directory.exists()){
}
(I know, the code do not delete something. It is only for my understanding)
The second step was to include this new created function into my Jenkins groovy file. But since then I'm desperate.
I have the problem that I do not get a positive result at the beginning from the code if the folder really exist.
The line:
if(directory.exists()){
makes me a lot of problems and it is not clear for me why.
I have tried so many kind of versions but I haven’t found a solution for me.
I have also used the “Pipeline Syntax” example [Sample Step fileExists] but it doesn’t help for me.
I have included:
import java.io.File
At the beginning of my file.
I have a basic file which I include in the Jenkins job. This file includes my library files. One of this library files is the file.groovy. In the basic Jenkins file I execute the function file.deleteFilesOlderThanDays() (for testing I do not use any parameters).
The code from my function for testing is:
def deleteFilesOlderThanDays() {
dir = '.\\ABC'
echo "1. ----------------------------------------"
File directory1 = new File('.\\ABC\\')
exist = directory1.exists()
echo 'Directory1 name is = '+directory1
echo 'exist value is = '+exist
echo "2. ----------------------------------------"
File directory2 = new File('.\\ABC')
exist = directory2.exists()
echo 'Directory2 name is = '+directory2
echo 'exist value is = '+exist
echo "3. ----------------------------------------"
File directory3 = new File(dir)
exist = directory3.exists()
echo 'Directory3 name is = '+directory3
echo 'exist value is = '+exist
echo "4. Pipeline Syntax ------------------------"
exist = fileExists '.\\ABC'
echo 'exist value is = '+exist
echo "5. ----------------------------------------"
File directory5 = new File(dir)
echo 'Directory5 name is = '+directory5
// execute an error
// exist = fileExists(directory5)
exist = fileExists "directory5"
echo 'exist value is = '+exist
echo "6. ----------------------------------------"
exist = fileExists(dir)
echo 'exist value is = '+exist
File[] listFiles = directory5.listFiles()
echo 'List file = '+listFiles
}
And the Output in the Jenkins Console Output is: (I cleaned it a little bit up….)
1. ----------------------------------------
Directory1 name is = .\ABC\
exist value is = false
2. ----------------------------------------
Directory2 name is = .\ABC
exist value is = false
3. ----------------------------------------
Directory3 name is = .\ABC
exist value is = false
4. Pipeline Syntax ------------------------
exist value is = true
5. ----------------------------------------
Directory5 name is = .\ABC
exist value is = false
6. ----------------------------------------
exist value is = true
List file = null
I only get a true value in step 4 and 6. So I can be sure that the folder really exist.
So it seems to be for me that the command:
File directory = new File(dir)
Not work correct in my case.
I can’t create a listFile variable because the directory would not be initialized correct.
For me is also not clear which kind of commands I should use. The groovy examples use always functions like:
.exists()
But in the Jenkins examples I always find code like this:
fileExists()
Why there are some differences between groovy and Jenkins groovy style? It should be the same ore not?
Does anyone have an idea for me or can told me what I’m doing wrong?
You may benefit from this answer from a similar question:
"
java.io.File methods will refer to files on the master where Jenkins is running, so not in the current workspace on the slave machine.
To refer to files on the slave machine, you should use the readFile method
"
def dir = readFile("${WORKSPACE}/ABC");
Link to original answer
Thanks for all that feedback.
OK, for me is now clear that Jenkins Groovy != Groovy is.
I have read a lot about it that there are different command if you are executing file search on a Jenkins Master or on a Jenkins Slave.
The suggestion from Youg to start after confirmation helps me.
I had problems with deleting the file so at the end I used a primitive batch command to get my function run.
The finally functions looks like now:
def deleteFilesOlderThanXDays(daysBack, path) {
def DAY_IN_MILLIS = 24 * 60 * 60 * 1000
if(fileExists(path)){
// change into path
dir(path) {
// find all kind of files
files = findFiles(glob: '*.*')
for (int i = 0; i < files.length; i++) {
def days_from_now = ( (System.currentTimeMillis() - files[i].lastModified) /(DAY_IN_MILLIS))
if(days_from_now > daysBack) {
echo('file : >>'+files[i].name+'<< is older than '+daysBack+' days')
bat('del /F /Q "'+files[i].name+'"')
}
else{
echo('file : >>'+files[i].name+'<< is not only than '+daysBack+' days')
}
}// End: for (int i = 0; i < files.length; i++) {
}// End: dir(path) {
}// End: if(fileExists(path)){
}
Thanks for helping and best regards,
You can add below script to list the file and folders in current work directory, so that you can confirm the folder ABC is exists or not.
After you confirm the ABC folder exist, then dig into the rest code.
def deleteFilesOlderThanDays() {
// print current work directory
pwd
// if jenkins job run on window machine
bat 'dir'
// if jenkins job run on linux machine
sh 'ls -l'
dir = '.\\ABC'
echo "1. ----------------------------------------"
.....
For fileExists usage, I think the correct way as following:
fileExists './ABC'
def dir = './ABC'
fileExists dir
Should use / as path separator, rather than \ according to its document at below:
The bazel build flag --workspace_status_command supports calling a script to retrieve e.g. repository metadata, this is also known as build stamping and available in rules like java_binary.
I'd like to create a custom rule using this metadata.
I want to use this for a common support function. It should receive the git version and some other attributes and create a version.go output file usable as a dependency.
So I started a journey looking at rules in various bazel repositories.
Rules like rules_docker support stamping with stamp in container_image and let you reference the status output in attributes.
rules_go supports it in the x_defs attribute of go_binary.
This would be ideal for my purpose and I dug in...
It looks like I can get what I want with ctx.actions.expand_template using the entries in ctx.info_file or ctx.version_file as a dictionary for substitutions. But I didn't figure out how to get a dictionary of those files. And those two files seem to be "unofficial", they are not part of the ctx documentation.
Building on what I found out already: How do I get a dict based on the status command output?
If that's not possible, what is the shortest/simplest way to access workspace_status_command output from custom rules?
I've been exactly where you are and I ended up following the path you've started exploring. I generate a JSON description that also includes information collected from git to package with the result and I ended up doing something like this:
def _build_mft_impl(ctx):
args = ctx.actions.args()
args.add('-f')
args.add(ctx.info_file)
args.add('-i')
args.add(ctx.files.src)
args.add('-o')
args.add(ctx.outputs.out)
ctx.actions.run(
outputs = [ctx.outputs.out],
inputs = ctx.files.src + [ctx.info_file],
arguments = [args],
progress_message = "Generating manifest: " + ctx.label.name,
executable = ctx.executable._expand_template,
)
def _get_mft_outputs(src):
return {"out": src.name[:-len(".tmpl")]}
build_manifest = rule(
implementation = _build_mft_impl,
attrs = {
"src": attr.label(mandatory=True,
allow_single_file=[".json.tmpl", ".json_tmpl"]),
"_expand_template": attr.label(default=Label("//:expand_template"),
executable=True,
cfg="host"),
},
outputs = _get_mft_outputs,
)
//:expand_template is a label in my case pointing to a py_binary performing the transformation itself. I'd be happy to learn about a better (more native, fewer hops) way of doing this, but (for now) I went with: it works. Few comments on the approach and your concerns:
AFAIK you cannot read in (the file and perform operations in Skylark) itself...
...speaking of which, it's probably not a bad thing to keep the transformation (tool) and build description (bazel) separate anyways.
It could be debated what constitutes the official documentation, but ctx.info_file may not appear in the reference manual, it is documented in the source tree. :) Which is case for other areas as well (and I hope that is not because those interfaces are considered not committed too yet).
For sake of comleteness in src/main/java/com/google/devtools/build/lib/skylarkbuildapi/SkylarkRuleContextApi.java there is:
#SkylarkCallable(
name = "info_file",
structField = true,
documented = false,
doc =
"Returns the file that is used to hold the non-volatile workspace status for the "
+ "current build request."
)
public FileApi getStableWorkspaceStatus() throws InterruptedException, EvalException;
EDIT: few extra details as asked in the comment.
In my workspace_status.sh I would have for instance the following line:
echo STABLE_GIT_REF $(git log -1 --pretty=format:%H)
In my .json.tmpl file I would then have:
"ref": "${STABLE_GIT_REF}",
I've opted for shell like notation of text to be replaced, since it's intuitive for many users as well as easy to match.
As for the replacement, relevant (CLI kept out of this) portion of the actual code would be:
def get_map(val_file):
"""
Return dictionary of key/value pairs from ``val_file`.
"""
value_map = {}
for line in val_file:
(key, value) = line.split(' ', 1)
value_map.update(((key, value.rstrip('\n')),))
return value_map
def expand_template(val_file, in_file, out_file):
"""
Read each line from ``in_file`` and write it to ``out_file`` replacing all
${KEY} references with values from ``val_file``.
"""
def _substitue_variable(mobj):
return value_map[mobj.group('var')]
re_pat = re.compile(r'\${(?P<var>[^} ]+)}')
value_map = get_map(val_file)
for line in in_file:
out_file.write(re_pat.subn(_substitue_variable, line)[0])
EDIT2: This is how the Python script is how I expose the python script to rest of bazel.
py_binary(
name = "expand_template",
main = "expand_template.py",
srcs = ["expand_template.py"],
visibility = ["//visibility:public"],
)
Building on Ondrej's answer, I now use somthing like this (adapted in SO editor, might contain small errors):
tools/bazel.rc:
build --workspace_status_command=tools/workspace_status.sh
tools/workspace_status.sh:
echo STABLE_GIT_REV $(git rev-parse HEAD)
version.bzl:
_VERSION_TEMPLATE_SH = """
set -e -u -o pipefail
while read line; do
export "${line% *}"="${line#* }"
done <"$INFILE" \
&& cat <<EOF >"$OUTFILE"
{ "ref": "${STABLE_GIT_REF}"
, "service": "${SERVICE_NAME}"
}
EOF
"""
def _commit_info_impl(ctx):
ctx.actions.run_shell(
outputs = [ctx.outputs.outfile],
inputs = [ctx.info_file],
progress_message = "Generating version file: " + ctx.label.name,
command = _VERSION_TEMPLATE_SH,
env = {
'INFILE': ctx.info_file.path,
'OUTFILE': ctx.outputs.version_go.path,
'SERVICE_NAME': ctx.attr.service,
},
)
commit_info = rule(
implementation = _commit_info_impl,
attrs = {
'service': attr.string(
mandatory = True,
doc = 'name of versioned service',
),
},
outputs = {
'outfile': 'manifest.json',
},
)
I am working on an ant script to build java prjects developed with IBM RAD 7.5.
The an script is calling IBM RAD ant extenstion API. I am using Task to load the project set file(*.psf) into the memory, and calling Task to compile the projects listed in the projectSetImport.
The problem is the projects listed in psf file is not ordered by project dependency, when compiles, it fails because the depency is incorrect.
Is there any API or method to manage the dependency automatically? the psf files Iam handling is quite big, with 200+ projects in each file and it is constanly changing(e.g. some projects get removed and some new projects added in each week)
here is a detailed description for the question:
The project dependency is like:
1) project A depends on B and D.
2) project B depends on C
3) project E depends on F
A -> B -> C
A -> D
E-> F
The sample.psf file just list all projects:
A
B
C
D
E
F
loads sample.psf, which have a project list [A,B,C,D,E,F]
build project list from
the build fail at A, because A need B and D to be build first.
My current solution is to rebuild the sample.psf manually, e.g.
sample.psf file:
C
B
D
A
F
E
but this is hard to maintain, because there are 200+ projects in a psf file and they are constanly changing.
One way to attack this issue is to write a parser to read the .project file for each project, the dependency projects are listed in "projects" tag. Then implement a Directed acyclic path algorithm to reorder the dependency. This approach might be over kill. This must be a common issue in teams build IBM java projects, is there a solution?
Finally, I wrote some python code to compute the dependency. I Listed the logic below:
read the psf file into an list, the psf file is a xml file, and
the project name is in tag.
for each project in the
list, go to project source code and read the .project file and
.classpath file, these two files contains the dependency project.
for .project file(xml), fetch the project name from tag,
for .classpath file. fetch the line with attribute kind='src'
now you got [source]->[dependened_project_list], implement a
Directed acyclic map.(see attached code)
load the [source]->[dependened_project] in to the AdjecentListDigraph, and
call topoSort() to return the dependency.
generate a new ordered psf file.
/////////////////////// dap_graph.py/////////////////////////////
# -*- coding: utf-8 -*-
'''Use directed acyclic path to calculate the dependency'''
class Vertex:
def init(self, name):
self._name = name
self.visited = True
class InValidDigraphError(RuntimeError):
def init(self, arg):
self.args = arg
class AdjecentListDigraph:
'''represent a directed graph by adjacent list'''
def __init__(self):
'''use a table to store edges,
the key is the vertex name, value is vertex list
'''
self._edge_table = {}
self._vertex_name_set = set()
def __addVertex(self, vertex_name):
self._vertex_name_set.add(vertex_name)
def addEdge(self, start_vertex, end_vertex):
if not self._edge_table.has_key(start_vertex._name):
self._edge_table[start_vertex._name] = []
self._edge_table[start_vertex._name].append(end_vertex)
# populate vertex set
self.__addVertex(start_vertex._name)
self.__addVertex(end_vertex._name)
def getNextLeaf(self, vertex_name_set, edge_table):
'''pick up a vertex which has no end vertex. return vertex.name.
algorithm:
for v in vertex_set:
get vertexes not in edge_table.keys()
then get vertex whose end_vertex is empty
'''
print 'TODO: validate this is a connected tree'
leaf_set = vertex_name_set - set(edge_table.keys())
if len(leaf_set) == 0:
if len(edge_table) > 0:
raise InValidDigraphError("Error: Cyclic directed graph")
else:
vertex_name = leaf_set.pop()
vertex_name_set.remove(vertex_name)
# remove any occurrence of vertext_name in edge_table
for key, vertex_list in edge_table.items():
if vertex_name in vertex_list:
vertex_list.remove(vertex_name)
# remove the vertex who has no end vertex from edge_table
if len(vertex_list) == 0:
del edge_table[key]
return vertex_name
def topoSort(self):
'''topological sort, return list of vertex. Throw error if it is
a cyclic graph'''
sorted_vertex = []
edge_table = self.dumpEdges()
vertex_name_set = set(self.dumpVertexes())
while len(vertex_name_set) > 0:
next_vertex = self.getNextLeaf(vertex_name_set, edge_table)
sorted_vertex.append(next_vertex)
return sorted_vertex
def dumpEdges(self):
'''return the _edge_list for debugging'''
edge_table = {}
for key in self._edge_table:
if not edge_table.has_key(key):
edge_table[key] = []
edge_table[key] = [v._name for v in self._edge_table[key]]
return edge_table
def dumpVertexes(self):
return self._vertex_name_set
//////////////////////projects_loader.py///////////////////////
-- coding: utf-8 --
'''
This module will load dependencies from every projects from psf, and compute
the directed acyclic path.
Dependencies are loaded into a map structured as below:
dependency_map{"project_A":set(A1,A2,A3),
"A1:set(B1,B2,B3)}
The algorithm is:
1) read
2) call readProjectDependency(project_name)
'''
import os, xml.dom.minidom
from utils.setting import configuration
class ProjectsLoader:
def __init__(self, application_name):
self.dependency_map = {}
self.source_dir = configuration.get('Build', 'base.dir')
self.application_name = application_name
self.src_filter_list = configuration.getCollection('psf',\
'src.filter.list')
def loadDependenciesFromProjects(self, project_list):
for project_name in project_list:
self.readProjectDependency(project_name)
def readProjectDependency(self, project_name):
project_path = self.source_dir + '\\' + self.application_name + '\\'\
+ project_name
project_file_path = os.path.join(project_path,'.project')
projects_from_project_file = self.readProjectFile(project_file_path)
classpath_file_path = os.path.join(project_path,'.classpath')
projects_from_classpath_file = self.\
readClasspathFile(classpath_file_path)
projects = (projects_from_project_file | projects_from_classpath_file)
if self.dependency_map.has_key(project_name):
self.dependency_map[project_name] |= projects
else:
self.dependency_map[project_name] = projects
def loadDependencyByProjectName(self, project_name):
project_path = self.source_dir + '\\' + self.application_name + '\\'\
+ project_name
project_file_path = os.path.join(project_path,'.project')
projects_from_project_file = self.readProjectFile(project_file_path)
classpath_file_path = os.path.join(project_path,'.classpath')
projects_from_classpath_file = self.\
readClasspathFile(classpath_file_path)
projects = list(set(projects_from_project_file\
+ projects_from_classpath_file))
self.dependency_map[project_name] = projects
for project in projects:
self.loadDependencyByProjectName(project)
def readProjectFile(self, project_file_path):
DOMTree = xml.dom.minidom.parse(project_file_path)
projects = DOMTree.documentElement.getElementsByTagName('project')
return set([project.childNodes[0].data for project in projects])
def readClasspathFile(self, classpath_file_path):
dependency_projects = set([])
if os.path.isfile(classpath_file_path):
DOMTree = xml.dom.minidom.parse(classpath_file_path)
projects = DOMTree.documentElement.\
getElementsByTagName('classpathentry')
for project in projects:
if project.hasAttribute('kind') and project.getAttribute\
('kind') == 'src' and project.hasAttribute('path') and \
project.getAttribute('path') not in self.src_filter_list:
project_name = project.getAttribute('path').lstrip('/')
dependency_projects.add(project_name)
return dependency_projects
def getDependencyMap(self):
return self.dependency_map