import java.io.*;
import com.hp.hpl.jena.datatypes.xsd.XSDDatatype.*;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.db.*;
import com.hp.hpl.jena.db.impl.*;
import com.hp.hpl.jena.graph.compose.*;
import com.hp.hpl.jena.graph.query.*;
import com.hp.hpl.jena.graph.*;
import com.hp.hpl.jena.vocabulary.*;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.rdf.model.ModelMaker.*;
import com.hp.hpl.jena.mem.*;
import com.hp.hpl.jena.mem.faster.*;
class Firstsparql{
public static void main(String[] args){
// Open the bloggers RDF graph from the filesystem
InputStream in = new FileInputStream(new File("foaf.rdf"));
// Create an empty in-memory model and populate it from the graph
Model model = ModelFactory.createMemModelMaker().createModel();
model.read(in,null); // null base URI, since model URIs are absolute
in.close();
// Create a new query
String queryString =
"PREFIX foaf: <http://xmlns.com/foaf/0.1/> " +
"SELECT ?url " +
"WHERE {" +
" ?contributor foaf:name \"Jon Foobar\" . " +
" ?contributor foaf:weblog ?url . " +
" }";
Query query = QueryFactory.create(queryString);
// Execute the query and obtain results
QueryExecution qe = QueryExecutionFactory.create(query, model);
ResultSet results = qe.execSelect();
// Output query results
ResultSetFormatter.out(System.out, results, query);
// Important – free up resources used running the query
qe.close();
}
}
Hi, i have included jena.jar file in my classpath but still some of variables and methods are not recognised. ihope they are not defined in jena api..is there any other api i need to include.pls..let me..know. thanks.
Your CLASSPATH needs to include all of the .jar files in the Jena lib directory, not just jena.jar.
Apart from advise above, you can read about jena's architecture on https://jena.apache.org/documentation
Particularly, Jena's part for process SPARQL query called ARQ. ARQ.jar involves QueryFactory.
Related
Following the Snowflake SnowPark tutorial here:
https://quickstarts.snowflake.com/guide/getting_started_with_snowpark/index.html?index=..%2f..index&msclkid=f0b56761cf1011ecb976c58c0f8e2a64#9
Which goes through how to use SnowPark to create a user defined function, upload data programmatically, and then execute the UDF code against the data in Snowflake.
I am able to connect to Snowflake and execute the tutorial Scala code from the command line, but when I try to create the stored procedure in step 10 I get an error message: Package 'com.snowflake:snowpark:latest' is not supported.
Does anybody know how to resolve this so that I can create the stored procedure with my scala code?
I am using the standard Snowflake on AWS and executing in a worksheet under the new interface. The connection settings mirror the programmatic settings that work for userrole, warehouse, database, and schema.
here is the SQL code:
create or replace procedure discoverHappyTweets()
returns string
language scala
runtime_version=2.12
packages=('com.snowflake:snowpark:latest')
imports=('#snowpark_demo_udf_dependency_jars/ejml-0.23.jar','#snowpark_demo_udf_dependency_jars/slf4j-api.jar','#snowpark_demo_udf_dependency_jars/stanford-corenlp-3.6.0-models.jar','#snowpark_demo_udf_dependency_jars/stanford-corenlp-3.6.0.jar')
handler = 'UDFDemo.discoverHappyTweets'
target_path = '#snowpark_demo_udf_dependency_jars/discoverHappyTweets.jar'
as
$$
import com.snowflake.snowpark._
import com.snowflake.snowpark.functions._
import com.snowflake.snowpark.SaveMode.Overwrite
import com.snowflake.snowpark.types.{StringType, StructField, StructType}
import java.util.Properties
import edu.stanford.nlp.ling.CoreAnnotations
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations
import edu.stanford.nlp.pipeline.StanfordCoreNLP
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations
// import org.apache.log4j.{Level, Logger}
/**
* Demonstrates how to use Snowpark to create user-defined functions (UDFs)
* in Scala.
*
* Before running the main method of this class, download the data and JAR files
* needed for the demo, then run the main method of the UDFDemoSetup class
* to upload those files to internal stages.
*/
object UDFDemo {
// The name of the internal stage for the demo data.
val dataStageName = "snowpark_demo_data"
// The name of the internal stage for the JAR files needed by the UDF.
val jarStageName = "snowpark_demo_udf_dependency_jars"
// The name of the file containing the dataset.
val dataFilePattern = "training.1600000.processed.noemoticon.csv"
/*
* Reads tweet data from the demo CSV file from a Snowflake stage and
* returns the data in a Snowpark DataFrame for analysis.
*/
def collectTweetData(session: Session): DataFrame = {
// Import names from the implicits object, which allows you to use shorthand
// to refer to columns in a DataFrame (e.g. `'columnName` and `$"columnName"`).
import session.implicits._
Console.println("\n=== Setting up the DataFrame for the data in the stage ===\n")
// Define the schema for the CSV file containing the demo data.
val schema = Seq(
StructField("target", StringType),
StructField("ids", StringType),
StructField("date", StringType),
StructField("flag", StringType),
StructField("user", StringType),
StructField("text", StringType),
)
// Read data from the demo file in the stage into a Snowpark DataFrame.
// dataStageName is the name of the stage that was created
// when you ran UDFDemoSetup earlier, and dataFilePattern is
// the pattern matching the files that were uploaded to that stage.
val origData = session
.read
.schema(StructType(schema))
.option("compression", "gzip")
.csv(s"#$dataStageName/$dataFilePattern")
// Drop all of the columns except the column containing the text of the tweet
// and return the first 100 rows.
val tweetData = origData.drop('target, 'ids, 'date, 'flag, 'user).limit(100)
Console.println("\n=== Retrieving the data and printing the text of the first 10 tweets")
// Display some of the data.
tweetData.show()
// Return the tweet data for sentiment analysis.
return tweetData
}
/*
* Determines the sentiment of the words in a string of text by using the
* Stanford NLP API (https://nlp.stanford.edu/nlp/javadoc/javanlp/).
*/
def analyze(text: String): Int = {
val props = new Properties()
props.setProperty("annotators", "tokenize, ssplit, pos, parse, sentiment")
lazy val pipeline = new StanfordCoreNLP(props)
lazy val annotation = pipeline.process(text)
annotation.get(classOf[CoreAnnotations.SentencesAnnotation]).forEach(sentence => {
lazy val tree = sentence.get(classOf[SentimentCoreAnnotations.SentimentAnnotatedTree])
return RNNCoreAnnotations.getPredictedClass(tree)
})
0
}
/*
* Creates a user-defined function (UDF) for sentiment analysis. This function
* registers the analyze function as a UDF, along with its dependency JAR files.
*/
def createUDF(session: Session): UserDefinedFunction = {
Console.println("\n=== Adding dependencies for your UDF ===\n")
// Register CoreNLP library JAR files as dependencies to support
// the UDF. The JAR files are already in the Snowflake stage named by
// jarStageName. The stage was created and JARs were uploaded when you ran
// the code in UDFDemoSetup.scala.
session.addDependency(s"#$jarStageName/stanford-corenlp-3.6.0.jar.gz")
session.addDependency(s"#$jarStageName/stanford-corenlp-3.6.0-models.jar.gz")
session.addDependency(s"#$jarStageName/slf4j-api.jar.gz")
session.addDependency(s"#$jarStageName/ejml-0.23.jar.gz")
Console.println("\n=== Creating the UDF ===\n")
// Register the analyze function as a UDF that analyzes the sentiment of
// text. Each value in the column that you pass to the UDF is passed to the
// analyze method.
val sentimentFunc = udf(analyze(_))
return sentimentFunc
}
/*
* Analyzes tweet data, discovering tweets with a happy sentiment and saving
* those tweets to a table in the database.
*/
def processHappyTweets(session: Session, sentimentFunc: UserDefinedFunction, tweetData: DataFrame): Unit = {
// Import names from the `implicits` object so you can use shorthand to refer
// to columns in a DataFrame (for example, `'columnName` and `$"columnName"`).
import session.implicits._
Console.println("\n=== Creating a transformed DataFrame that contains the results from calling the UDF ===\n")
// Call the UDF on the column that contains the content of the tweets.
// Create and return a new `DataFrame` that contains a "sentiment" column.
// This column contains the sentiment value returned by the UDF for the text
// in each row.
val analyzed = tweetData.withColumn("sentiment", sentimentFunc('text))
Console.println("\n=== Creating a transformed DataFrame with just the happy sentiments ===\n")
// Create a new DataFrame that contains only the tweets with happy sentiments.
val happyTweets = analyzed.filter('sentiment === 3)
Console.println("\n=== Retrieving the data and printing the first 10 tweets ===\n")
// Display the first 10 tweets with happy sentiments.
happyTweets.show()
Console.println("\n=== Saving the data to the table demo_happy_tweets ===\n")
// Write the happy tweet data to the table.
happyTweets.write.mode(Overwrite).saveAsTable("demo_happy_tweets")
}
/*
* Reads tweet data from a demo CSV, creates a UDF, then uses the UDF to
* discover the sentiment of tweet text.
*/
def discoverHappyTweets(session: Session): String = {
// Collect tweet data from the demo CSV.
val tweetData = collectTweetData(session)
// Register a user-defined function for determining tweet sentiment.
val sentimentFunc = createUDF(session)
// Analyze tweets to discover those with a happy sentiment.
val happyTweets = processHappyTweets(session, sentimentFunc, tweetData)
"Complete"
}
}
$$;
We have a beam pipeline written in Java that we run on GCP Dataflow. Its very simple, it takes a SQL query as a PipelineOption, issues that SQL query against BigQuery and for every row in the returned dataset constructs a message and puts it onto a pubsub topic.
import com.google.api.services.bigquery.model.TableRow;
import java.util.HashMap;
import java.util.Map;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.PipelineResult;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.options.Validation.Required;
import org.apache.beam.sdk.options.ValueProvider;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The {#code BigQueryEventReplayer} pipeline runs a supplied SQL query
* against BigQuery, and sends the results one-by-one to PubSub
* The query MUST return a column named 'json', it is this column
* (and ONLY this column) that will be sent onward. The column must be a String type
* and should be valid JSON.
*/
public class BigQueryEventReplayer {
private static final Logger logger = LoggerFactory.getLogger(BigQueryEventReplayer.class);
/**
* Options for the BigQueryEventReplayer. See descriptions for more info
*/
public interface Options extends PipelineOptions {
#Description("SQL query to be run."
+ "An SQL string literal which will be run 'as is'")
#Required
ValueProvider<String> getBigQuerySql();
void setBigQuerySql(ValueProvider<String> value);
#Description("The name of the topic which data should be published to. "
+ "The name should be in the format of projects/<project-id>/topics/<topic-name>.")
#Required
ValueProvider<String> getOutputTopic();
void setOutputTopic(ValueProvider<String> value);
#Description("The ID of the BigQuery dataset targeted by the event")
#Required
ValueProvider<String> getBigQueryTargetDataset();
void setBigQueryTargetDataset(ValueProvider<String> value);
#Description("The ID of the BigQuery table targeted by the event")
#Required
ValueProvider<String> getBigQueryTargetTable();
void setBigQueryTargetTable(ValueProvider<String> value);
#Description("The SourceSystem attribute of the event")
#Required
ValueProvider<String> getSourceSystem();
void setSourceSystem(ValueProvider<String> value);
}
/**
* Takes the data from the TableRow and prepares it for the PubSub, including
* adding attributes to ensure the payload is routed correctly.
*/
// We would rather use a SimpleFunction here but then we wouldn't be able
// to inject our value providers. So instead we hackishly make a nested class
public static class MapQueryToPubsub extends DoFn<TableRow, PubsubMessage> {
private final ValueProvider<String> targetDataset;
private final ValueProvider<String> targetTable;
private final ValueProvider<String> sourceSystem;
MapQueryToPubsub(
ValueProvider<String> targetDataset,
ValueProvider<String> targetTable,
ValueProvider<String> sourceSystem) {
this.targetDataset = targetDataset;
this.targetTable = targetTable;
this.sourceSystem = sourceSystem;
}
/**
* Entry point of DoFn for Dataflow.
*/
#ProcessElement
public void processElement(ProcessContext c) {
TableRow row = c.element();
if (!row.containsKey("json")) {
logger.warn("table does not contain column named 'json'");
}
Map<String, String> attributes = new HashMap<>();
attributes.put("sourceSystem", sourceSystem.get());
attributes.put("targetDataset", targetDataset.get());
attributes.put("targetTable", targetTable.get());
String json = (String) row.get("json");
c.output(new PubsubMessage(json.getBytes(), attributes));
}
}
/**
* Run the pipeline. This is the entrypoint for running 'locally'
*/
public static void main(String[] args) {
// Parse the user options passed from the command-line
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
run(options);
}
/**
* Run the pipeline. This is the entrypoint that GCP will use
*/
public static PipelineResult run(Options options) {
Pipeline pipeline = Pipeline.create(options);
pipeline.apply("Read from BigQuery query",
BigQueryIO.readTableRows().fromQuery(options.getBigQuerySql()).usingStandardSql().withoutValidation()
.withTemplateCompatibility())
.apply("Map data to PubsubMessage",
ParDo.of(
new MapQueryToPubsub(
options.getBigQueryTargetDataset(),
options.getBigQueryTargetTable(),
options.getSourceSystem()
)
)
)
.apply("Write message to PubSub", PubsubIO.writeMessages().to(options.getOutputTopic()));
return pipeline.run();
}
}
The BigQuery data being queried is essentially a log of events. We have recently determined that the order in which we insert those events onto the pubsub topic is important. We can determine the correct order by using an ORDER BY in the query that we issue against BigQuery however we are skeptical as to whether that order will be respected when the data gets inserted onto the pubsub topic.
Our main concern is in this code:
pipeline.apply("Read from BigQuery query",
BigQueryIO.readTableRows().fromQuery(options.getBigQuerySql()).usingStandardSql().withoutValidation()
.withTemplateCompatibility())
that simple command manifests as this in Dataflow:
There is a lot happening in that step (shuffles etc...) and actually many of the sub-steps are themselves made up of multiple sub-steps. Moreover, one of the sub-steps is called "ReadFiles" which makes me think that perhaps Dataflow is writing the data to some sort of temporary file store. All-in-all this leads me to doubt that an ORDER BY in the supplied SQL query will be preserved when the rows get published to pubsub.
Does beam/Dataflow offer any guarantee that the ORDER BY will be preserved in this scenario or am I going to have to introduce a sort into my pipeline to guarantee that the desired order is adhered to?
The BigQueryIO Read basically consists an import job to GCS as Avro for the query/table and then a Read from those files (and some more stuff). So it won't preserve the order, since the reads are in parallel and there will be multiple threads reading chunks of the created file(s).
Generally speaking, distributed processing systems as Dataflow (or Spark, etc) don't preserve order and are bad at ordering stuff given the parallel nature of their job. Bare in mind that to sort elements you need to hold everything in a single worker.
In fact, even in BigQuery, the ORDER BY is quite a demanding task.
It's hard to find workarounds for this, since the systems are not built for this type of tasks. I can think on adding the ROW NUMBER, using that as timestamp and adding a window, but this is quite use case specific.
Also, PubSubIO won't preserve the order when publishing.
I need to find shortest paths between nodes, but with some restrictions on relations types in good paths.
I have two relation types: A & B.
Path is considered bad if it has two or more consecutive relation of type B:
Good path: ()-A->()-A->()<-A-()-B->()-A->()-B->()
Bad path: ()-A->()-A->()<-A-()-B->()<-B-()-A->()
The Cypher query:
MATCH path=allShortestPaths( (p:P{idp:123})-[rel:A|B*]-(p2:P{idp:124}) )
WHERE *some-predicate-on-path-or-rel*
RETURN path
is not a solution because the shortest good path may be longer than shortest bad paths.
Q1: Can this problem be solved by some Cypher query?
I can solve my problem with the embedded Java Neo4J API:
GraphDatabaseService graphDb = new GraphDatabaseFactory().newEmbeddedDatabase("db/store/dir/path");
TraversalDescription td = graphDb.traversalDescription()
.breadthFirst()
.evaluator(Evaluators.toDepth(max_depth))
.evaluator(Evaluators.endNodeIs(Evaluation.INCLUDE_AND_PRUNE, Evaluation.EXCLUDE_AND_CONTINUE, endNode))
.evaluator(new DoubleB_PruneEvaluator());
static class DoubleB_PruneEvaluator implements Evaluator {
#Override
public Evaluation evaluate(final Path path) {
Iterator<Relationship> lRels = path.reverseRelationships().iterator();
if (lRels.hasNext() && lRels.next().isType(MyRelTypes.B)) {
if (lRels.hasNext() && lRels.next().isType(MyRelTypes.B))
return Evaluation.EXCLUDE_AND_PRUNE;
}
return Evaluation.INCLUDE_AND_CONTINUE;
}
}
Q2: Is this solution is quite efficient? Or how to improve?
But my application is written on PHP and interacts with Neo4j server via REST protocol.
Q3: How can I run this solution by some REST query?
No intelligent person wouldn't answer me. So I will try myself.
A1: This problem cannot be solved by standard Cypher query. (My Neo4j version 3.1.1)
A2: This solution is not quite efficient for several reasons:
The standard function shortestPath is implemented by using more
efficient Bidirectional BFS.
This traversal description does not contain a stop condition when
the solution is found. The traversal will continue until the maximum
depth.
In addition, this solution finds only one path. The other paths of the same length will not be found.
A3: Java coded solutions can be added to a server by extending Neo4j.
I solve my problem using user-defined procedures:
my/app/RelType.java:
package my.app;
import org.neo4j.graphdb.*;
public enum RelType implements RelationshipType {
A, B
}
my/app/DoubleB_PruneEvaluator.java:
package my.app;
import java.util.*;
import org.neo4j.graphdb.*;
import org.neo4j.graphdb.traversal.*;
public class DoubleB_PruneEvaluator implements Evaluator {
#Override
public Evaluation evaluate(final Path path) {
Iterator<Relationship> lRels = path.reverseRelationships().iterator();
if (lRels.hasNext() && lRels.next().isType(RelType.marry)) {
if (lRels.hasNext() && lRels.next().isType(RelType.marry))
return Evaluation.EXCLUDE_AND_PRUNE;
}
return Evaluation.INCLUDE_AND_CONTINUE;
}
}
my/app/Procedures.java:
package my.app;
import java.util.stream.Stream;
import org.neo4j.graphdb.*;
import org.neo4j.procedure.*;
import org.neo4j.graphdb.traversal.*;
public class Procedures {
#Context
public GraphDatabaseService db;
#Procedure
public Stream<PathHit> shortestWo2B(
#Name("from") Node fromNode,
#Name("to") Node toNode,
#Name("maxDepth") long maxDepth)
{
TraversalDescription td = db.traversalDescription()
.breadthFirst()
.relationships(RelType.A)
.relationships(RelType.B)
.evaluator(Evaluators.toDepth((int)maxDepth))
.evaluator(Evaluators.endNodeIs(Evaluation.INCLUDE_AND_PRUNE, Evaluation.EXCLUDE_AND_CONTINUE, toNode))
.evaluator(new DoubleB_PruneEvaluator());
return td.traverse(fromNode)
.stream()
.map( PathHit::new );
}
public static class PathHit {
public Path path;
public PathHit(Path path) {
this.path = path;
}
}
}
Doc: https://neo4j.com/docs/java-reference/3.1/javadocs/index.html?org/neo4j/procedure/Procedure.html
A few words about the compilation and installation plugin:
As a beginner in Java, I decided that utilities Eclipse and Maven is too heavy. I prefer to use simple javac & jar:
$ export CLASSPATH=/path/to/neo4j-install-dir/lib/*:.
$ javac my/app/*.java
$ jar -cf my-neo4j-plugin.jar my/app/*.class
$ cp my-neo4j-plugin.jar /path/to/neo4j-install-dir/plugins/
$ /path/to/neo4j-install-dir/bin/neo4j restart
Now we can run the Cypher query:
MATCH (p1:P{idp:123})
MATCH (p2:P{idp:124})
CALL my.app.shortestWo2B(p1,p2,100) YIELD path
RETURN path;
I have an OWL ontology file as RDF and want to store my data in a TDB and want to use reasoning. Actually this sounds simple so far :)
But here is the point where I'm confuesd:
I created a TDB an stored via SPARQL some statements. Then I tried to load the TDB via a model and OWL reasoner:
OntModelSpec ontModelSpec = OntModelSpec.OWL_MEM;
Reasoner reasoner = ReasonerRegistry.getOWLReasoner();
ontModelSpec.setReasoner(reasoner);
Model schemaModel = FileManager.get().loadModel("D:/Users/jim/Desktop/ontology/schema.rdf");
OntModel schema = ModelFactory.createOntologyModel( ontModelSpec, schemaModel);
Location location = new Location("D:/Users/jim/Desktop/jena-fuseki-0.2.5/DB");
Dataset dataset = TDBFactory.createDataset(location);
Model model = dataset.getDefaultModel();
OntModel ontModel = ModelFactory.createOntologyModel(ontModelSpec, model);
When I now create new resources via API, they are not stored in the TDB. And I'm not able to see the Statments have added via SPARQL?!
The SPAQRL statement shows me only the entries I've added with SPARQL
QueryExecution qExec = QueryExecutionFactory.create(
StrUtils.strjoinNL("SELECT ?s ?p ?prop",
"WHERE {?s ?p ?prop}"),
dataset) ;
ResultSet rs = qExec.execSelect() ;
try {
ResultSetFormatter.out(rs) ;
} finally { qExec.close() ; System.out.println("closed connection");}
and this returns only the Resource added with the API
System.out.print("instance: " + ontModel.getResource(NS + "TestItem"));
And when I call this:
ExtendedIterator<Statement> iter = ontModel.listStatements();
I get the following exception:
org.openjena.atlas.lib.InternalErrorException: Invalid id node for subject (null node): ([0000000000000067], [0000000000000093], [00000000000000C8])
Is someone able to explain that behavior? Or could someone please give me a hint how to separate schema and date with TDB in right way with using the OntModel?
Partial answer:
org.openjena.atlas.lib.InternalErrorException: Invalid id node for subject (null node): ([0000000000000067], [0000000000000093], [00000000000000C8])
You are using TDB without transactions - try adding TDB.sync before exiting to flush changes to the disk.
I'm using Spring Data for Neo4j to access Neo4j graph. I have a UserRepository with an annotated query as below:
package com.abc.graph.repository;
import java.util.List;
import java.util.Map;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.neo4j.annotation.Query;
import org.springframework.data.neo4j.repository.GraphRepository;
import org.springframework.data.neo4j.repository.NamedIndexRepository;
import org.springframework.data.neo4j.repository.RelationshipOperationsRepository;
import com.abc.graph.entity.User;
public interface UserRepository extends GraphRepository<User>, NamedIndexRepository<User>,
RelationshipOperationsRepository<User> {
public User findById(String id);
public Page<User> findByNameLike(String name, Pageable page);
#Query("START user=node:User(id={0}) " +
"MATCH user-[:VISITS]->(location)<-[:VISITS]-(similar_user) " +
"RETURN similar_user, collect(location) as locations, count(*) as count " +
"ORDER BY count desc ")
public List<Map<String, Object>> findSimilarUsersByPlaceVisited(String userId);
}
What I am trying to retrieve from the graph is a list of users who have been to the similar places and for each user, what are the common places they have been to. The method will return a list of Map<String, Object>. Each map will contains key like similar_user, locations and count.
From debug statement I can see that similar_user is an instance of org.neo4j.rest.graphdb.entity.RestNode. Is there any way to convert it to my Spring Data node entity which is com.abc.graph.entity.User?
You can do it manually by using:
template.createEntityFrom[Stored]State(userNode[,User.class)
Or you define as result an Iterable or Collection of an interface annotated with #MapResult with getters for the three columns. It either automatically maps the getter names to result columns or you can provide a name to map to.
#MapResult
interface SimilarUser {
#ResultColumn("count") int getCount();
#ResultColumn("similar_user") User getUser();
#ResultColumn("locations") Collection<Location> getLocations();
}