Derivative node in kapacitor batch - influxdb

I am using derivative node to calculate bandwidth utilization of network devices, below is the script.
I am using where clause because i wanted alert for specific interface for specific Ip.
// database
var database = 'router'
// measurement from where data is coming
var measurement = 'cisco_router'
// RP from where data is coming
var RP = 'autogen'
// which influx cluster to use
var clus = 'network'
// durations
var period = 7m
var every = 10s
// alerts
var crit = 320
var alertName = 'cisco_router_bandwidth_alert'
var triggerType = 'threshold'
batch
|query(''' SELECT (mean("bandwidth_in") * 8) as "value" FROM "router"."autogen"."cisco_router" where host = '10.1.11.1' and ( interface_name = 'GigabitEthernet0/0/0' or interface_name = 'GigabitEthernet0/0/1') ''')
.cluster('network')
.period(7m)
.every(6m)
.groupBy(*)
|derivative('value')
.unit(1s)
.nonNegative()
.as('value')
|alert()
.crit(lambda: "value" > crit)
.stateChangesOnly()
.message(' {{.Level}} for {{ index .Tags "device_name" }} on Port {{ index .Tags "name" }} {{ .Time.Local.Format "2006.01.02 - 15:04:05" }} ')
.details('''
<pre>
------------------------------------------------------------------
CLIENT NAME : XXXXXXXX
ENVIRONMENT : Prod
DEVICE TYPE : Router
CATEGORY : {{ index .Tags "type" }}
IP ADDRESS : {{ index .Tags "host" }}
DATE : {{ .Time.Local.Format "2006.01.02 - 15:04:05" }}
INTERFACE NAME : {{ index .Tags "name" }}
VALUE : {{ index .Fields "value" }}
SEVERITY : {{.Level}}
------------------------------------------------------------------
</pre>
''')
.log('/tmp/chronograf/cisco_router_interface_alert.log')
.levelTag('level')
.idTag('id')
.messageField('message')
.email()
.to('XXXXXXX')
|influxDBOut()
.database('chronograf')
.retentionPolicy(RP)
.measurement('alerts')
.tag('alertName', alertName)
But it is not showing anything when i do kapacitor watch and not showing any errors in logs.

derivative() and some other nodes like stateDuration() kind of resets their state on each new batch query, in opposite to stream mode, where their state is kept whole time.
Actually, it is because in batch mode this nodes designed to track changes only inside the current batch of points.
Since your query returns single point - there is no result from derivative().
Try move derivative to the query. And use |httpOut() node to track results on each step - really helpful to understand kapacitor logic.
here is some example:
dbrp "telegraf"."autogen"
var q= batch
|query('SELECT derivative(mean("bytes_recv"), 1s) AS "bytes_recv_1s" FROM "telegraf"."autogen"."net" WHERE time < now() AND "interface"=\'eth0\' GROUP BY time(10m) fill(none)')
.period(10m)
.every(30s).align()
.groupBy(time(10m))
.fill('none')
|last('bytes_recv_1s').as('value')
|httpOut('query')
Note, there is a bugs associated with query parsing, that requires specify GROUP BY in both query and tick
https://github.com/influxdata/kapacitor/issues/971
https://github.com/influxdata/kapacitor/issues/622

Related

Can I pick a cell from every Google Sheet in a Folder?

I want to be able to pick say C3 from a list of Google spreadsheets in a folder.
I have a bunch of structurally identical sheets, but I'd like to be able to provide a sum of the values in C3 across say a hundred sheets in a directory.
Ultimately, would be great to highlight the largest or smallest value of C3 in a directory.
This could be useful in many places where you want to be able to aggregate, aggregate data.
SUGGESTION
If you have hundreds of Google spreadsheet files in a Google Drive folder, I agree with #player0 that it is best to use a script. With the Apps Script, you can:
Automate the process in iterating through Spreadsheet files in your Drive folder.
Filter only the Google Spreadsheet type (e.g you have a bunch of
different file types inside).
Get the range data & process them the way you want.
See this sample below that was derived from existing resources:
Script:
function readSheetsInAFolder() {
//FOLDER_ID is your drive folder ID
var query = '"FOLDER_ID" in parents and trashed = false and ' +
'mimeType = "application/vnd.google-apps.spreadsheet"';
var range = "C3"; //The range to look for on every Spreadsheet files in the Drive folder
var files, pageToken;
var finalRes = [];
do {
files = Drive.Files.list({
q: query,
maxResults: 100,
pageToken: pageToken
});
files.items.forEach(sheet => {
finalRes.push(viewRangeValue(range, sheet.id));
})
pageToken = files.nextPageToken;
} while (pageToken);
const arrSum = array =>
array.reduce(
(sum, num) => sum + (Array.isArray(num) ? arrSum(num) : num * 1),
0
);
var max = Math.max.apply(null, finalRes.map(function(row){ return Math.max.apply(Math, row) })); //Gets the largest number
var min = Math.min.apply(null, finalRes.map(function(row){ return Math.min.apply(Math, row); })); //Gets the smallest number
var sum = arrSum(finalRes) // Gets the sum
console.log('RANGE VALUES: %s \nRANGE: %s \nTOTAL SHEET(s) FOUND: %s \n________________\nSUM OF VALUES: %s \nLargest Value: %s \nSmallest Value: %s',finalRes,range, files.items.length,sum,max,min)
}
function viewRangeValue(range, sheetID) {
var sid = sheetID;
var rn = range;
var parms = { valueRenderOption: 'UNFORMATTED_VALUE', dateTimeRenderOption: 'SERIAL_NUMBER' };
var res = Sheets.Spreadsheets.Values.get(sid, rn, parms);
return res.values.map(num => {return parseInt(num)});
}
Demonstration:
Sample Test Drive Folder (w/ 3 test Spreadsheet files):
Every C3 cell on each of these 3 files contain either 0,10 or 6 value.
On the Apps Script Editor, I've added the Drive & Sheets API on the services:
Result
After running the script:
Resources:
Advanced Drive Service
Drive API Files: list
Sheets API spreadsheets.values.get
Max Value of an array

How to get VWAP using DolphinDB TimeSeriesEngine or ReactiveStateEngine

I am getting live tick data consisting of Time, Symbol Name, Last Traded Price, Cumulative Volume (Daily).
Now how to get VWAP using 1) Custom function 2) TimeSeriesEngine 3) ReactiveStateEngine with DolphinDB? Please Help me. Necessary code is as under.
This is stream table for getting ticks from python
t_colNames=`ts`symbol`price`vol`upd_tick
t_colTypes=`TIMESTAMP`SYMBOL`DOUBLE`DOUBLE`TIMESTAMP
This is stream table to store 1 min OHLC data
ohlc_colNames=`ts`symbol`open`high`low`close`volume`tp`last_tick`upd_1m
ohlc_colTypes=`TIMESTAMP`SYMBOL`DOUBLE`DOUBLE`DOUBLE`DOUBLE`DOUBLE`DOUBLE`TIMESTAMP`TIMESTAMP
This is 1 min OHLC TimeSeriesEngine
OHLC_sm1 = createTimeSeriesEngine(name="OHLC_sm1", windowSize=60000, step=60000, metrics=<[first(price) as open, max(price) as high, min(price) as low, last(price) as close, sum(vol) as volume, (max(price)+min(price)+last(price))/3 as tp, last(upd_tick) as last_tick, now() as upd_1m]>, dummyTable=tmp, outputTable=sm1 , timeColumn=`ts, useSystemTime=true, keyColumn=`symbol, updateTime=60000, useWindowStartTime=false);
This is the function to convert cumulative volume to volume
def calcVolume(mutable dictVolume, mutable tsAggrOHLC, msg){
t = select ts,symbol,price,vol,upd_tick from msg context by symbol limit -1
update t set prevVolume = dictVolume[symbol]
dictVolume[t.symbol] = t.vol
tsAggrOHLC.append!(t.update!("vol", <vol-prevVolume>))
}
dictVol = dict(STRING, DOUBLE)
subscribeTable(tableName="t", actionName="OHLC_sm1", offset=0, handler=calcVolume{dictVol,OHLC_sm1}, msgAsTable=true, hash=1)
I recommend using ReactiveStateEngine to convert cumulative volume to volume and then connecting two engines in series. Here is an example:
tradesData = your_tick_data
//define Trade Table
x=tradesData.schema().colDefs
share streamTable(100:0, x.name, x.typeString) as Trade
//define OHLC outputTable
share streamTable(100:0, `datetime`symbol`open`high`low`close`volume`updatetime,[TIMESTAMP,SYMBOL,DOUBLE,DOUBLE,DOUBLE,DOUBLE,LONG,TIMESTAMP]) as OHLC
//1 min OHLC TimeSeriesEngine
tsAggrOHLC = createTimeSeriesAggregator(name="aggr_ohlc", windowSize=60000, step=60000, metrics=<[first(Price),max(Price),min(Price),last(Price),wavg(Price,Volume),now()]>, dummyTable=Trade, outputTable=OHLC, timeColumn=`Datetime, keyColumn=`Symbol)
//ReactiveStateEngine:convert cumulative volume to volume
rsAggrOHLC = createReactiveStateEngine(name="calc_vol", metrics=<[Datetime, Price, deltas(Volume) as Volume]>, dummyTable=Trade, outputTable=tsAggrOHLC, keyColumn=`Symbol)
//subscribe table and insert data into engines
subscribeTable(tableName="Trade", actionName="minuteOHLC2", offset=0, handler=append!{rsAggrOHLC}, msgAsTable=true)
replay(inputTables=tradesData, outputTables=Trade, dateColumn=`Datetime)
You can use user-defined functions in any of the engine's matrics.

Kapacitor task with multiple percentiles

I want to aggregate data from last minute from telegraf with kapacitor before putting them into influxdb and I also have this need for calculating few percentiles. And so I wrote a simple tick for test
var firstPerc = stream
|from()
.measurement('my_tmp_measurement_from_telegraf')
var secondPerc = stream
|from()
.measurement('my_tmp_measurement_from_telegraf')
firstPerc
|join(secondPerc)
.as('fp', 'sp')
|percentile('fp.myAggVal', 50.0)
|eval(lambda: "percentile")
.as('50p')
|percentile('sp.myAggVal', 90.0)
|eval(lambda: "percentile")
.as('90p')
|window()
.period(60s)
.every(60s)
.align()
|influxDBOut()
.database('myDBInInflux')
.retentionPolicy('autogen')
In my database, I have only values for 50th percentile, and I am not suprised with that since I use "percentile" in my eval but still, I cannot find in Kapacitor documentation any clue about how to get result I need.
Here you have "visual" result I crave for:
time 50p 90p someOtherP's otherDataICanPropablyHandle
Halp!
You are using the same measurement stream (and the same data in it) twice, so data are popped. First you should save the measurement stream:
var myStream = stream
|from()
.measurement('my_tmp_measurement_from_telegraf')
Next define streams using saved measurement. You should define here proper grouping, evaluations, etc.:
var firstPerc = myStream
|percentile('myAggVal', 50.0)
|eval(lambda: "percentile")
.as('percentile')
|window()
.period(60s)
.every(60s)
.align()
var secondPerc = myStream
|percentile('myAggVal', 90.0)
|eval(lambda: "percentile")
.as('percentile')
|window()
.period(60s)
.every(60s)
.align()
Finaly, it's time to define join stream:
var joinedStreams = firstPerc
|join(secondPerc)
.as('50', '90')
.tolerance(1s)
.streamName('measurementName')
|influxDBOut()
.database('myDBInInflux')
.retentionPolicy('autogen')
.create()
The output:
time 50.percentile 90.percentile
I strongly suggest using .tolerance(), which will group measurements within the same tolerance period.

How can I maximize throughput in Docker and Akka HTTP?

I am building a specific jig for performance measurement. I have a load generator, boom (https://github.com/rakyll/boom). With this I can generate a pretty decent amount of load.
I also have a Docker image containing nginx as a load balancer, and two Akka-HTTP based REST servers. These do nothing except count hits (they always just return 200).
Running one of these servers stand-alone (outside the Docker) I have been able to get 1000 hits/second. Not sure if that's good or not. In this Docker configuration that figure drops to about 220 hits/second. I was kinda expecting, well... 2000 hits/second or thereabouts. Higher would even be better. I'd be happy if I can find a way to get 3-4K hits/sec with this arrangement.
I often get an error message like this:
[9549] Get http://192.168.99.100:9090/dispatcher?reply_to=foo: dial tcp 192.168.99.100:9090: socket: too many open files
Tried running my Docker with --ulimit nofile=2048, but that didn't help. My application.conf for Akka is merely:
akka {
loglevel = "ERROR"
stdout-loglevel = "ERROR"
http.host-connection-pool.max-open-requests = 512
}
The server code:
object Main extends App {
implicit val system = ActorSystem()
implicit val mat = ActorMaterializer()
println(":: Starting Simulator on port "+args(0))
Http().bindAndHandle(route, java.net.InetAddress.getLoopbackAddress.getHostAddress, args(0).toInt)
var hits = 0
var isTiming = false
var numSec = 1
lazy val route =
get {
path("dispatcher") {
if(isTiming) hits += 1
complete(StatusCodes.OK)
} ~
path("startTiming" / IntNumber) { sec =>
isTiming = true
hits = 0
numSec = sec
val timeUnit = FiniteDuration(sec, SECONDS)
system.scheduler.scheduleOnce(timeUnit){ isTiming = false }
complete(StatusCodes.OK)
} ~
path("tps") {
val tps = hits/numSec * 2
complete(s"""${args(0)}: TPS-$tps\n""")
}
}
}
Theory of operation: Start traffic flowing then call the /startTiming/10 endpoint (for a 10-second capture on one of the 2 servers). After 10 seconds, call /tps a couple of times and the timing node will return approx. hits/second (x2).
Any idea how I can get more performance out of this?

py2neo 2.x, "Local entity is not bound to a remote entity"

When updating Neo4j and py2neo to last versions (2.2.3 and 2.0.7 respectively), I'm facing some problems with some import scripts.
For instance here, just a bit of code.
graph = py2neo.Graph()
graph.bind("http://localhost:7474/db/data/")
batch = py2neo.batch.PushBatch(graph)
pp.pprint(batch)
relationshipmap={}
def create_go_term(line):
if(line[6]=='1'):
relationshipmap[line[0]]=line[1]
goid = line[0]
goacc = line[3]
gotype = line[2]
goname = line[1]
term = py2neo.Node.cast( {
"id": goid, "acc": goacc, "term_type": gotype, "name": goname
})
term.labels.add("GO_TERM")
pp.pprint(term)
term.push()
#batch.append( term )
return True
logging.info('creating terms')
reader = csv.reader(open(opts.termfile),delimiter="\t")
iter = 0
for row in reader:
create_go_term(row)
iter = iter + 1
if ( iter > 5000 ):
# batch.push()
iter = 0
# batch.push()
When using batch or simply push without batch, I'm getting this error:
py2neo.error.BindError: Local entity is not bound to a remote entity
What am I doing wrong?
Thanks!
I think you first have to create the node before you can add the label and use push:
term = py2neo.Node.cast( {
"id": goid, "acc": goacc, "term_type": gotype, "name": goname
})
graph.create(term) # now the node should be bound to a remote entity
term.labels.add("GO_TERM")
term.push()
Alternatively, you can create the node with a label:
term = Node("GO_TERM", id=goid, acc=goacc, ...)
graph.create(term)

Resources