Flume Custom Interceptor is not Working - flume

I want to change IP address in data by its mask IP. This is done in the "backup" part of my Flume agent (see below).
In this configuration there are 2 channels: the first channel dumps data to HBase, while the second one is used for backup:
a1.sources = r1 r2
a1.channels = channel1 Backup_channel
a1.sinks = FSink
a1.sources.r1.handler = com.flume.handler.JSONHandler
a1.sources.r1.type = avro
a1.sources.r1.bind = x.x.x.x
a1.sources.r1.port = 10008
a1.sources.r2.handler = com.flume.handler.JSONHandler
a1.sources.r2.type = avro
a1.sources.r2.bind = x.x.x.x
a1.sources.r2.port = 10009
a1.sources.r2.interceptors = i1
a1.sources.r2.interceptors.i1.type = com.flume.interceptor.DcInterceptor
a1.channels.channel1.type = file
a1.channels.channel1.checkpointDir = /root/flume/channels/Livechannel/checkpoint
a1.channels.channel1.dataDirs = /root/flume/channels/Livechannel/data
a1.sinks.FSink.type = hbase
a1.sinks.FSink.table = Temp_Test
a1.sinks.FSink.batchSize = 300
a1.sinks.FSink.columnFamily = T
a1.sinks.FSink.serializer = com.flume.sink.TestTP
a1.sources.r1.channels = channel1
a1.sources.r2.channels = Backup_channel
a1.channels.Backup_channel.type = file
a1.channels.Backup_channel.checkpointDir = /data/disk/flume/backup/checkpoint
a1.channels.Backup_channel.dataDirs = /data/disk/flume/backup/data
a1.sinks.FSink.channel = channel1
Following is my custom Java Interceptor Code. It implements the interception method, which get an IP address from the body, calculates its IP mask and then adds it to the body. But somehow it's not working:
public class DcInterceptor implements Interceptor {
private byte[] jsonTestBeans;
private final Type listType = new TypeToken < List < TestBeans >> () {}.getType();
#Override
public void close() {
// TODO Auto-generated method stub
}
#Override
public void initialize() {
// TODO Auto-generated method stub
new Logger();
}
#Override
public Event intercept(Event event) {
// TODO Auto-generated method stub
List < Row > actions = new ArrayList < Row > ();
this.jsonTestBeans = event.getBody();
Logger.logger.debug("In Interceptor");
System.out.println("In Interceptor");
Gson _Gson = new Gson();
String jsonstr = "";
try {
jsonstr = new String(jsonTestBeans, "UTF-8");
} catch (Exception e) {
// TODO: handle exception
Logger.logger.error(e.getMessage() + "In Interceptor");
jsonstr = new String(jsonTestBeans);
}
List < TestBeans > TestBeanss = _Gson.fromJson(jsonstr, listType);
System.out.println("Json String :" + jsonstr);
List < String > gTouch = new ArrayList < String > ();
for (TestBeans TestBeans: TestBeanss) {
String str = TestBeans.getIp();
Logger.logger.debug("IP : " + str);
String st = (str.substring(0, str.lastIndexOf(".") + 1) + "x");
Logger.logger.debug("Mask IP : " + st);
TestBeans.setRemoteIp(st);
}
event.setBody(_Gson.toJson(TestBeanss).getBytes());
Logger.logger.debug("Interceptor Ends");
return event;
}
#Override
public List < Event > intercept(List < Event > events) {
// TODO Auto-generated method stub
System.out.println("In List Interceptor");
Logger.logger.debug("In List Interceptor");
for (Event event: events) {
intercept(event);
}
return events;
}
public static class CounterInterceptorBuilder implements Interceptor.Builder {
private Context ctx;
#Override
public Interceptor build() {
Logger.logger.debug("In Interceptor Build");
System.out.println("In Build Interceptor");
return new DcInterceptor();
}
#Override
public void configure(Context context) {
this.ctx = context;
}
}

At least, I can see:
The configuration lines regarding your interceptor refer to an agent called ECircleTp_Test, while the rest of the configuration refer to a1.
You have configured com.flume.interceptor.DcInterceptor2, but the interceptor class you have developed is called DcInterceptor (without the final 2).
You have configured com.flume.interceptor.DcInterceptor2 as the fully qualified class name of your custom interceptor. Nevertheless, the code of the interceptor does not declare any package for the DcInterceptor(2) class.

Related

Custom Batch filter in weka

I am trying to build a custom batch filter that extends SimpleBatchFilter. However, I am experiencing the problem of running it second time to get an inverted output. Here is the relevant code and the error I am getting after both runs are completed:
Exception in thread "main" java.lang.IndexOutOfBoundsException: Index: 79, Size: 79
at java.util.ArrayList.rangeCheck(ArrayList.java:653)
at java.util.ArrayList.get(ArrayList.java:429)
at weka.core.Attribute.addStringValue(Attribute.java:994)
at weka.core.StringLocator.copyStringValues(StringLocator.java:155)
at weka.core.StringLocator.copyStringValues(StringLocator.java:91)
at weka.filters.Filter.copyValues(Filter.java:373)
at weka.filters.Filter.push(Filter.java:290)
at weka.filters.SimpleBatchFilter.batchFinished(SimpleBatchFilter.java:266)
at weka.filters.Filter.useFilter(Filter.java:667)
at likeability.Main.main(Main.java:30)
And here is the relevant code:
public class TestFilter extends SimpleBatchFilter {
private Attribute a;
private Attribute b;
private int sampleSizePercent = 15;
private boolean invert = false;
private int seed = 1;
#Override
protected Instances process(Instances inst) throws Exception {
ArrayList<Instances> partitionsA = partition(inst, a);
ArrayList<Instances> partitions = new ArrayList<Instances>();
for(Instances data: partitionsA) {
partitions.addAll(partition(data, b));
}
return getTestSet(partitions);
}
/*
* Partitions the data so that there's only one nominal value of the
* attribute a in one partition.
*/
private ArrayList<Instances> partition(Instances data, Attribute att) throws Exception {
ArrayList<Instances> instances = new ArrayList<Instances>();
for (int i = 0; i < att.numValues(); i++){
RemoveWithValues rm = new RemoveWithValues();
rm.setAttributeIndex(Integer.toString(att.index()+1));
rm.setInvertSelection(true);
rm.setNominalIndices(Integer.toString(i+1));
rm.setInputFormat(data);
instances.add(Filter.useFilter(data, rm));
}
return instances;
}
private Instances getTestSet(List<Instances> insts) throws Exception {
Instances output = new Instances(insts.get(0), 0);
for(Instances inst: insts) {
Resample filter = new Resample();
filter.setRandomSeed(seed);
filter.setNoReplacement(true);
filter.setInvertSelection(invert);
filter.setSampleSizePercent(sampleSizePercent);
filter.setInputFormat(inst);
Instances curr = Filter.useFilter(inst, filter);
System.out.println(inst.size() + " " + curr.size());
output.addAll(curr);
}
return output;
}
#Override
protected Instances determineOutputFormat(Instances arg) throws Exception {
return new Instances(arg, 0);
}
#Override
public String globalInfo() {
return "A filter which partitions the data so that each partition contains"
+ " only instances with one value of attribute a and b, then takes "
+ "a random subset of values from each partition and merges them to"
+ " produce the final set.";
}
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.enableAllAttributes();
result.enableAllClasses();
result.enable(Capability.NO_CLASS); // filter doesn't need class to be set
return result;
}
//Main and getters and setters
}
And this is how I call it:
TestFilter filter = new TestFilter();
filter.setA(data.attribute("gender"));
filter.setB(data.attribute("age"));
filter.setInputFormat(data);
Instances test = Filter.useFilter(data, filter);
filter.setInvert(true);
filter.setInputFormat(data);
Instances train = Filter.useFilter(data, filter);
It seems to me quite stupid that I would need to use those two lines between the calls. I suspect I should use isBatchFinished(), does it mean I have to implement it extending BatchFilter rather then SimpleBatchFilter? It would be also helpful to see some successful implementations, since the only ones I could find where the ones in the WEKA manual.
I solved it by extending a Filter instead and changing the process function to batchFinished(). I am posting this answer as I have not found a custom filter example anywhere else.
#Override
public boolean batchFinished() throws Exception {
if(isFirstBatchDone()) {
invert = true;
}
if (getInputFormat() == null)
throw new NullPointerException("No input instance format defined");
Instances inst = getInputFormat();
ArrayList<Instances> partitionsA = partition(inst, a);
ArrayList<Instances> partitions = new ArrayList<Instances>();
for(Instances data: partitionsA) {
partitions.addAll(partition(data, b));
}
private void getTestSet(List<Instances> insts) throws Exception {
for(Instances inst: insts) {
Resample filter = new Resample();
filter.setRandomSeed(seed);
filter.setNoReplacement(true);
filter.setInvertSelection(invert);
filter.setSampleSizePercent(sampleSizePercent);
filter.setInputFormat(inst);
Instances curr = Filter.useFilter(inst, filter);
System.out.println(inst.size() + " " + curr.size());
curr.forEach((i) -> push(i));
}
}
#Override
public boolean setInputFormat(Instances arg) throws Exception {
super.setInputFormat(arg);
Instances outputFormat = new Instances(arg, 0);
setOutputFormat(outputFormat);
return true;
}

handleURI for http://AAA.BBB.CCC.DDD:8080/myapp/ uri: '' returns ambigious result (Vaadin 6)

In my Vaadin 6 application I sometimes get the following error:
SEVERE: Terminal error:
java.lang.RuntimeException: handleURI for http://AAA.BBB.CCC.DDD:8080/myapp/ uri: '' returns ambigious result.
at com.vaadin.ui.Window.handleURI(Window.java:432)
at com.vaadin.terminal.gwt.server.AbstractCommunicationManager.handleURI(AbstractCommunicationManager.java:2291)
at com.vaadin.terminal.gwt.server.CommunicationManager.handleURI(CommunicationManager.java:370)
at com.vaadin.terminal.gwt.server.AbstractApplicationServlet.handleURI(AbstractApplicationServlet.java:1099)
at com.vaadin.terminal.gwt.server.AbstractApplicationServlet.service(AbstractApplicationServlet.java:535)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:728)
Accrording to Vaadin source it occurs in the following method:
public DownloadStream handleURI(URL context, String relativeUri) {
DownloadStream result = null;
if (uriHandlerList != null) {
Object[] handlers;
synchronized (uriHandlerList) {
handlers = uriHandlerList.toArray();
}
for (int i = 0; i < handlers.length; i++) {
final DownloadStream ds = ((URIHandler) handlers[i]).handleURI(
context, relativeUri);
if (ds != null) {
if (result != null) {
throw new RuntimeException("handleURI for " + context
+ " uri: '" + relativeUri
+ "' returns ambigious result.");
}
result = ds;
}
}
}
return result;
}
I actually create a DownloadStream in a column generator (in order to display images in a table):
public class ImageColumnGenerator implements Table.ColumnGenerator {
private static final Logger LOGGER = LoggerFactory.getLogger(ImageColumnGenerator.class);
public final static String IMAGE_FIELD = "image";
public Object generateCell(final Table aTable, final Object aItemId, final Object aColumnId) {
if (!IMAGE_FIELD.equals(aColumnId)) {
return null;
}
final BeanItem<UserProductImageBean> beanItem = (BeanItem<UserProductImageBean>)
aTable.getItem(aItemId);
final StreamResource streamResource = new StreamResource(new StreamResource.StreamSource() {
public InputStream getStream() {
return new ByteArrayInputStream(beanItem.getBean().getImageData());
}
},
beanItem.getBean().getFileName(),
MyApplication.getInstance());
LOGGER.debug("imageResource: " + streamResource);
final Embedded embedded = new Embedded("", streamResource);
return embedded;
}
}
beanItem.getBean().getImageData() is a byte array (byte[]) with image data, which I get from a web service.
MyApplication.getInstance() is defined as follows:
public class MyApplication extends Application implements ApplicationContext.TransactionListener
{
private static ThreadLocal<MyApplication> currentApplication =
new ThreadLocal<MyApplication> ();
public static MyApplication getInstance()
{
return currentApplication.get ();
}
}
What can I do in order to fix the aforementioned (severe) error?
As soon as nobody answer. I'm not at all expert in what hell it is above, but - try to find out on what kind of urls this error arise on, and do with them something before feed them to DownloadStream

Partial Unmarshalling of an XML using JAXB to skip some xmlElement

I want to unmarshal an XML file to java object using JAXB. The XML file is very large and contains some nodes which I want to skip in some cases to improve performance as these elements are non editable by client java program.
A sample XML is as follows:
<Example id="10" date="1970-01-01" version="1.0">
<Properties>...</Properties>
<Summary>...</Summary>
<RawData>
<Document id="1">...</Document>
<Document id="2">...</Document>
<Document id="3">...</Document>
------
------
</RawData>
<Location></Location>
<Title></Title>
----- // more elements
</Example>
I have two use cases:
unmarshal into Example object which contains Properties, Summaries, RawData etc. without skipping any RawData. (already done this part)
unmarshal into Example object which exclude RawData. Elements nested in RawData is very large so do not want to read this in this use case.
Now I want to unmarshal the XML such that RawData can be skipped. I have tried the technique provided at this link.
Using technique provided in above link also skips all elements which come after RawData.
I have fixed the issue with XMLEventReader with following code:
public class PartialXmlEventReader implements XMLEventReader {
private final XMLEventReader reader;
private final QName qName;
private boolean skip = false;
public PartialXmlEventReader(final XMLEventReader reader, final QName element) {
this.reader = reader;
this.qName = element;
}
#Override
public String getElementText() throws XMLStreamException {
return reader.getElementText();
}
#Override
public Object getProperty(final String name) throws IllegalArgumentException {
return reader.getProperty(name);
}
#Override
public boolean hasNext() {
return reader.hasNext();
}
#Override
public XMLEvent nextEvent() throws XMLStreamException {
while (isEof(reader.peek())) {
reader.nextEvent();
}
return reader.nextEvent();
}
#Override
public XMLEvent nextTag() throws XMLStreamException {
return reader.nextTag();
}
#Override
public XMLEvent peek() throws XMLStreamException {
return reader.peek();
}
#Override
public Object next() {
return reader.next();
}
#Override
public void remove() {
reader.remove();
}
#Override
public void close() throws XMLStreamException {
reader.close();
}
private boolean isEof(final XMLEvent e) {
boolean returnValue = skip;
switch (e.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
final StartElement se = (StartElement) e;
if (se.getName().equals(qName)) {
skip = true;
returnValue = true;
}
break;
case XMLStreamConstants.END_ELEMENT:
final EndElement ee = (EndElement) e;
if (ee.getName().equals(qName)) {
skip = false;
}
break;
}
return returnValue;
}
}
While Unmarshalling just pass this eventReader to the unmarshal method
final JAXBContext context = JAXBContext.newInstance(classes);
final Unmarshaller um = context.createUnmarshaller();
Reader reader = null;
try {
reader = new BufferedReader(new FileReader(xmlFile));
final QName qName = new QName("RawData");
final XMLInputFactory xif = XMLInputFactory.newInstance();
final XMLEventReader xmlEventReader = xif.createXMLEventReader(reader);
final Example example =
(Example) um.unmarshal(new PartialXmlEventReader(xmlEventReader, qName));
}
} finally {
IOUtils.closeQuietly(reader);
}
I hope this would help
try {
// First create a new XMLInputFactory
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
// Setup a new eventReader
InputStream in = new FileInputStream("myXml");
XMLEventReader eventReader = inputFactory.createXMLEventReader(in);
// Read the XML document
Example example = null;
while (eventReader.hasNext()) {
XMLEvent event = eventReader.nextEvent();
if (event.isStartElement()) {
StartElement startElement = event.asStartElement();
// If we have a example element we create a new example
if (startElement.getName().getLocalPart().equals("Example")) {
example = new Example();
// We read the attributes from this tag and add the date
// and id attribute to our object
Iterator<Attribute> attributes = startElement
.getAttributes();
while (attributes.hasNext()) {
Attribute attribute = attributes.next();
if (attribute.getName().toString().equals("date")) {
example.setDate(attribute.getValue());
} else if (attribute.getName().toString().equals("id")) {
example.setId(attribute.getValue());
}
}
}
//get the Properties tag and add to object example
if (event.isStartElement()) {
if (event.asStartElement().getName().getLocalPart()
.equals("Properties")) {
event = eventReader.nextEvent();
example.setProperites(event.asCharacters().getData());
continue;
}
}
//get the Summary tag and add to object example
if (event.asStartElement().getName().getLocalPart()
.equals("Summary")) {
event = eventReader.nextEvent();
example.setSummary(event.asCharacters().getData());
continue;
}
// when you encounter the Rawdata tag just continue
//without adding it to the object created
if (event.asStartElement().getName().getLocalPart()
.equals("Rawdata")) {
event = eventReader.nextEvent();
// don't do anything
continue;
}
//get the location tag and add to object example
if (event.asStartElement().getName().getLocalPart()
.equals("Location")) {
event = eventReader.nextEvent();
example.setLocation(event.asCharacters().getData());
continue;
}
// read and add other elements that can be added
}
// If we reach the end of an example element/tag i.e closing tag
if (event.isEndElement()) {
EndElement endElement = event.asEndElement();
if (endElement.getName().getLocalPart().equals("Example")) {
//do something
}
}
}
} catch (FileNotFoundException | XMLStreamException e) {
}

Understanding mahout classification output

I have trained mahout model for three categories Category_A,Category_B,Category_C using 20newsGroupExample , Now i want to classify my documents using this model. Can somebody help me to understand output i am getting from this model.
Here is my output
{0:-2813549.8786637094,1:-2651723.736745838,2:-2710651.7525975127}
According to output category of document is 1, But expected category is 2. Am i going right or something is missing in my code ?
public class NaiveBayesClassifierExample {
public static void loadClassifier(String strModelPath, Vector v)
throws IOException {
Configuration conf = new Configuration();
NaiveBayesModel model = NaiveBayesModel.materialize(new Path(strModelPath), conf);
AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model);
Vector st = classifier.classifyFull(v);
System.out.println(st.asFormatString());
System.out.println(st.maxValueIndex());
st.asFormatString();
}
public static Vector createVect() throws IOException {
FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
String inputData=readData();
StringReader in = new StringReader(inputData);
TokenStream ts = analyzer.tokenStream("body", in);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
Vector v1 = new RandomAccessSparseVector(100000);
while (ts.incrementToken()) {
char[] termBuffer = termAtt.buffer();
int termLen = termAtt.length();
String w = new String(termBuffer, 0, termLen);
encoder.addToVector(w, 1.0, v1);
}
v1.normalize();
return v1;
}
private static String readData() {
// TODO Auto-generated method stub
BufferedReader reader=null;
String line, results = "";
try{
reader = new BufferedReader(new FileReader("c:\\inputFile.txt"));
while( ( line = reader.readLine() ) != null)
{
results += line;
}
reader.close();
}
catch(Exception ex)
{
ex.printStackTrace();
}
return results;
}
public static void main(String[] args) throws IOException {
Vector v = createVect();
String mp = "E:\\Final_Model\\model";
loadClassifier(mp, v);
}
}

Which MBeans to use (and how) to programmatically determine memory lows for app (CQ5) deployed inside Weblogic

I have to write a standalone Java app that monitors CQ5, deployed inside Weblogic (especially memory usage).
I was able to connect to the Domain Runtime Server in weblogic, using the class below (as found in the docs).
Now, I want to know which MBeans I need to monitor memory lows, so I can fire an event whenever a certain threshold is being hit.
Can any of you give me some insight? This is a pure JMX / Java question, unrelated to CQ.
I am trying to programmatically recreate what Jconsole already does. But I need it programmatically because I need to talk to an external API in case certain thresholds are being hit.
public class PrintServerState {
private static MBeanServerConnection connection;
private static JMXConnector connector;
private static final ObjectName service;
private static final ObjectName bundleWrite;
static {
try {
service = new ObjectName("com.bea:Name=DomainRuntimeService,Type=weblogic.management.mbeanservers.domainruntime.DomainRuntimeServiceMBean");
} catch (MalformedObjectNameException e) {
throw new AssertionError(e.getMessage());
}
}
/*
* Initialize connection to the Domain Runtime MBean Server
*/
public static void initConnection(String hostname, String portString,
String username, String password) throws IOException,
MalformedURLException {
String protocol = "t3";
Integer portInteger = Integer.valueOf(portString);
int port = portInteger.intValue();
String jndiroot = "/jndi/";
String mserver = "weblogic.management.mbeanservers.domainruntime";
JMXServiceURL serviceURL = new JMXServiceURL(protocol, hostname,
port, jndiroot + mserver);
Hashtable h = new Hashtable();
h.put(Context.SECURITY_PRINCIPAL, username);
h.put(Context.SECURITY_CREDENTIALS, password);
h.put(JMXConnectorFactory.PROTOCOL_PROVIDER_PACKAGES,
"weblogic.management.remote");
connector = JMXConnectorFactory.connect(serviceURL, h);
connection = connector.getMBeanServerConnection();
System.out.println("***************** get mbean count ************************* " + connection.getMBeanCount());
Set<ObjectName> mbeans = connection.queryNames(null, null);
for (ObjectName mbeanName : mbeans) {
System.out.println(mbeanName);
}
System.out.println("********************** ---- ***********************");
}
/*
* Print an array of ServerRuntimeMBeans.
* This MBean is the root of the runtime MBean hierarchy, and
* each server in the domain hosts its own instance.
*/
public static ObjectName[] getServerRuntimes() throws Exception {
return (ObjectName[])connection.getAttribute(service,
"ServerRuntimes");
}
/*
* Iterate through ServerRuntimeMBeans and get the name and state
*/
public void printNameAndState() throws Exception {
ObjectName[] serverRT = getServerRuntimes();
System.out.println("got server runtimes");
int length = (int) serverRT.length;
for (int i = 0; i < length; i++) {
String name = (String) connection.getAttribute(serverRT[i],
"Name");
String state = (String) connection.getAttribute(serverRT[i],
"Type");
System.out.println("Server name: " + name + ". Server state: "
+ state);
}
}
public static void main(String[] args) throws Exception {
String hostname = args[0];
String portString = args[1];
String username = args[2];
String password = args[3];
PrintServerState s = new PrintServerState();
System.out.println("hostname " + hostname);
System.out.println("portString " + portString);
System.out.println("username " + username);
System.out.println("password " + password);
initConnection(hostname, portString, username, password);
System.out.println("**************************************************");
s.printNameAndState();
connector.close();
}
}
Would this help -
domainRuntime()
cd('/ServerRuntimes/' + eval('managedServerName') + '/JVMRuntime/' + eval('managedServerName'))
heapFreeCurrentPerOld = str(cmo.getHeapFreePercent())
heapFreeCurrentValOld = str(cmo.getHeapFreeCurrent())

Resources