Monitor a stream with a Supervisor in Elixir - twitter

I use the ExTwitter-library to poll data using a stream like this:
stream = ExTwitter.stream_sample(receive_messages: true)
for message <- stream do
case message do
tweet = %ExTwitter.Model.Tweet{} ->
IO.puts "tweet = #{tweet.text}"
deleted_tweet = %ExTwitter.Model.DeletedTweet{} ->
IO.puts "deleted tweet = #{deleted_tweet.status[:id]}"
limit = %ExTwitter.Model.Limit{} ->
IO.puts "limit = #{limit.track}"
stall_warning = %ExTwitter.Model.StallWarning{} ->
IO.puts "stall warning = #{stall_warning.code}"
_ ->
IO.inspect message
end
end
and it's working great but now I want to monitor the stream with a Supervisor. What is the simplest way to do that?

The simplest way would be to put this code in a function in a new module, add a start_link function that simply invokes this function through spawn_link, and add that module as a worker to your Supervisor. Here's a simple example:
defmodule M do
use Application
def start(_type, _args) do
import Supervisor.Spec, warn: false
children = [
worker(M.Streamer, []),
]
opts = [strategy: :one_for_one, name: M.Supervisor]
Supervisor.start_link(children, opts)
end
end
defmodule M.Streamer do
def start_link do
{:ok, spawn_link(__MODULE__, :main, [])}
end
def main do
IO.inspect(self)
for i <- Stream.cycle([1, 2, 3]) do
IO.puts i
:timer.sleep(1000)
end
end
end
Demo:
#PID<0.85.0>
iex(1)> 1
2
3
1
2
3
1
2
Process.exit3pid(0, 85, 0), :kill)
#PID<0.88.0>
1
true
iex(2)> 2
3
1
2
3
1
2
3
Process.exit(pid(0, 88, 0), :kill)
#PID<0.90.0>
true
1
iex(3)> 2
3
1
2
3
This might be a little hard to follow as the output was happening while I was typing, but all the PID values were printed whenever Streamer started, the Process.exit lines are the code entered by me, and true is the return value of those calls. As you can see, whenever I killed the M.Streamer process, it was restarted by the Supervisor.

Related

How to use `otel_resource_detector` from OpenTelemetry in Elixir

I'm an Erlang/Elixir noob and I'm doing a research on how to use the otel_resource_detector in Elixir.
I've managed to get this working in Erlang some time ago, but I'm struggling to get things right in Elixir.
This is what I have in Erlang:
-module(extra_metadata).
-behaviour(otel_resource_detector).
-export([get_resource/1]).
get_resource(_) ->
Resource1 = otel_resource:create(otel_resource_app_env:parse(get_metadata("/data/extrametadata.properties")), []),
{ok, HiddenMetadataFile} = file:read_file("/data/hiddenpath.properties"),
Resource2 = otel_resource:create(otel_resource_app_env:parse(get_metadata(HiddenMetadataFile)), []),
otel_resource:merge(Resource1, Resource2).
get_metadata(FileName) ->
try
{ok, MetadataFile} = file:read_file(FileName),
Lines = binary:split(MetadataFile, <<"\n">>, [trim, global]),
make_tuples(Lines, [])
catch _:_ -> "Extra Metadata not found"
end.
make_tuples([Line|Lines], Acc) ->
[Key, Value] = binary:split(Line, <<"=">>),
make_tuples(Lines, [{Key, Value}|Acc]);
make_tuples([], Acc) -> Acc.
Full Erlang app here: https://github.com/julianocosta89/erlang_otel_hello_server/tree/main
I'm trying to make use of the otel_resource_detector from OpenTelemetry:
https://github.com/open-telemetry/opentelemetry-erlang/blob/37f3cecd9ad2a7b8f3b94c89118585991f0023b1/apps/opentelemetry/src/otel_resource_detector.erl
How would I use that in Elixir?
One can easily call erlang modules from elixir. The first module would look like
defmodule ExtraMetadata do
#behaviour :otel_resource_detector
def get_resource(_) do
resource1 =
:otel_resource.create(
:otel_resource_app_env.parse(
get_metadata("/data/extrametadata.properties")), [])
{:ok, hidden_metadata_file} =
File.read("/data/hiddenpath.properties")
resource2 =
:otel_resource.create(
:otel_resource_app_env.parse(
get_metadata(hidden_metadata_file)), [])
:otel_resource.merge(resource1, resource2)
end
defp get_metadata(file_name) do
try do
{:ok, metadata_file} = File.read(file_name)
lines = :binary.split(metadata_file, <<"\n">>, [:trim, :global])
make_tuples(lines)
catch
_ -> "Extra Metadata not found"
end
end
# Enum.map/2 would be probably more idiomatic
defp make_tuples(lines, acc \\ [])
defp make_tuples([line|lines], acc) do
[key, value] = :binary.split(line, <<"=">>)
make_tuples(lines, [{key, value}|cc])
defp make_tuples([], acc), do: acc
end
NB I obviously did not check the code above, some glitches might need some additional handling.
Here is a working snippet:
defmodule ExtraMetadata do
#behaviour :otel_resource_detector
def get_resource(_) do
lines = read_file("/data/extrametadata.properties") |> unwrap_lines
file_path = read_file("/data/hiddenpath.properties") |> unwrap_lines
lines2 = read_file(file_path) |> unwrap_lines
attributes = get_attributes(Enum.concat(lines, lines2))
:otel_resource.create(attributes)
end
defp unwrap_lines({:ok, lines}), do: lines
defp unwrap_lines({:error, _}), do: []
defp read_file(file_name) do
try do
{:ok, String.split(File.read!(file_name), "\n")}
rescue
File.Error ->
{:error, "File does not exist, safe to continue"}
end
end
defp get_attributes(lines) do
# Transform each string into a tuple
Enum.map(lines, fn(line) ->
if String.length(line) > 0 do
[key, value] = String.split(line, "=")
{key, value}
else
{:error, "Empty string"}
end
end)
end
end
I've also pushed the whole project here: https://github.com/julianocosta89/elixir-hello.

Tarantool fiber behavior with fiber.yield() and fiber.testcancel()

I ran into an unexpected behavior while building Tarantool app based on fibers.
Simple reproducer of my code looks like this:
local log = require('log')
local fiber = require('fiber')
box.cfg{}
local func = function()
for i = 1, 100000 do
if pcall(fiber.testcancel) ~= true then
return 1
end
fiber.yield()
end
return 0
end
local wrapfunc = function()
local ok, resp = pcall(func)
log.info(ok)
log.info(resp)
end
for _ = 1, 100 do
local myfiber = fiber.create(wrapfunc)
fiber.sleep(0.02)
fiber.kill(myfiber)
end
and it prints to log false, fiber is cancelled. Moreover, if I use the following func:
local func = function()
for i = 1, 100000 do
if pcall(fiber.testcancel) ~= true then
return 1
end
pcall(fiber.yield)
end
return 0
end
it prints to log true, 1, and if I use
local func = function()
for i = 1, 100000 do
if pcall(fiber.testcancel) ~= true then
return 1
end
if pcall(fiber.yield) ~= true then
return 2
end
end
return 0
end
it prints to log true, 2.
I expected that after yielding from running myfiber, if control returns to the external fiber and it calls fiber.kill(myfiber), the next time control returns to the cancelled myfiber we will be in the end of cycle iteration and on the next iteration code will successfully return 1. However, the work of func ends with throwing error fiber is cancelled, not with return. So how the real life cycle of yielding fiber works?
Actually, there is no unexpected behaviour here. I believe it mostly documentation issue. Let me explain. I've simplified your example a bit:
#!/usr/bin/env tarantool
local fiber = require('fiber')
local f1 = function() fiber.yield() end
local f2 = function() pcall(fiber.yield) end
local func = function(fn)
fn()
if not pcall(fiber.testcancel) then
return 'fiber.testcancel() failed'
end
end
local fiber1 = fiber.create(function() print(pcall(func, f1)) end)
fiber.kill(fiber1)
local fiber2 = fiber.create(function() print(pcall(func, f2)) end)
fiber.kill(fiber2)
The output would be:
false fiber is cancelled
true fiber.testcancel() failed
When you call fiber.kill, fiber.yield() or fiber.sleep() just raises an error, so your fiber is not able to reach fiber.testcancel and just dies. When you do pcall(fiber.yield), you basically suppress this error and proceed. Then fiber.testcancel checks its fiber status and reraise an exception. But this is a silly example.
Now, with bigger chunks of code, when lots of function invocations involved, you usually want to catch those errors during yield, do some finalisation work and call fiber.testcancel() to promote error upwards (imagine multiple checks of this kind in different parts of big stacktrace). I believe it is the basic use case, fiber.testcancel was introduced for, besides discussions if its design is usable or not.
P.s. And yes, occasional exceptions of such yield calls are not documented. At least I could not find anything in the fiber page

How can I handle with duplicate items in Aerospike script

I have script, its work properly, but I have to update it. Script now add items without any checking for existing.
function put_page(rec, id, val)
local l = rec['h']
if l==nil then l = list() rec['id'] = id end
list.append(l, val)
rec['h'] = l
if aerospike:exists(rec) then aerospike:update(rec) else aerospike:create(rec) end
end
I try iterate over list with for value in list.iterator(l) and append item if value~=val, but it didnt work.
ID in function is solr document_id, val is users_id. I get example object from aerospike: (('contextChannel', 'ContextChannel', None, bytearray(b'E\xfb\xa3\xd0\r\xd6\r\J#f\xa8\xf6>y!\xd18=\x9b')), {'ttl': 2592000, 'gen': 8}, {'id': 'ALKSD4EW', 'h': []})
UPDATE
I try different variants, and this is worked:
function put_page(rec, id, val)
local l = rec['h']
local count = 0
if l==nil then l = list() rec['id'] = id end
for value in list.iterator(l) do
if (value ~= val) then count = count + 1 end
end
if (list.size(l) == count) then list.append(l, val) end
rec['h'] = l
if aerospike:exists(rec) then aerospike:update(rec) else aerospike:create(rec) end
end
Don't create a UDF for something that exists as a List API operation. UDFs will not perform as well, nor scale as well.
You can do this without a UDF. Here's an example of doing the same thing using the Python client.
from aerospike_helpers.operations import list_operations as lh
from aerospike_helpers.operations import operations as oh
list_policy = {
"list_order": aerospike.LIST_UNORDERED,
"write_flags": (aerospike.LIST_WRITE_ADD_UNIQUE |
aerospike.LIST_WRITE_NO_FAIL)
}
ops = [
oh.write('id', id),
lh.list_append('h', val, list_policy)
]
client.operate(key, ops)
I have an example of a similar thing at rbotzer/aerospike-cdt-examples.

Supervision tree failing to start

I'm trying to implement something like what is described in this answer, but I am getting errors like what I've included below when I compile the application.
** (Mix) Could not start application workers: Workers.Application.start(:normal, []) returned an error: shutdown: failed to start child: {Workers.UrlSupervisor, 2}
** (EXIT) already started: #PID<0.1034.0>
I'm not sure if I am inherently doing something I'm not allowed to here, or I've just made a little mistake.
For some context here are the supervisors:
defmodule Workers.Application do
# See http://elixir-lang.org/docs/stable/elixir/Application.html
# for more information on OTP Applications
#moduledoc false
use Application
def start(_type, _args) do
import Supervisor.Spec, warn: false
url_workers = 1..100 |> Enum.map(fn (i) -> supervisor(Workers.UrlSupervisor, [i], [id: {Workers.UrlSupervisor, i}, restart: :temporary]) end)
domain_workers = 1..100 |> Enum.map(fn (i) -> supervisor(Workers.DomainSupervisor, [i], [id: {Workers.DomainSupervisor, i}, restart: :temporary]) end)
opts = [strategy: :one_for_one, name: Workers.Supervisor]
Supervisor.start_link(url_workers ++ domain_workers, opts)
end
end
defmodule Workers.UrlSupervisor do
def start_link(id) do
import Supervisor.Spec, warn: false
children = [worker(Task, [&Workers.Url.worker/0], [id: {Workers.Url, id}, restart: :permanent])]
opts = [strategy: :one_for_one, name: Workers.UrlSupervisor]
Supervisor.start_link(children, opts)
end
end
defmodule Workers.DomainSupervisor do
def start_link(id) do
import Supervisor.Spec, warn: false
children = [worker(Task, [&Workers.Domain.worker/0], [id: {Workers.Domain, id}, restart: :permanent])]
opts = [strategy: :one_for_one, name: Workers.DomainSupervisor]
Supervisor.start_link(children, opts)
end
end
And here is one of the workers (they look largely the same).
defmodule Workers.Domain do
def worker do
case Store.Domains.pop do
:empty ->
IO.puts "[Domain] none found, waiting..."
:timer.sleep(1000)
{crawl_id, domain} ->
IO.puts "[Domains] found a domain to check: #{domain}"
case Core.check_domain(domain) do
:error ->
Utils.insert(crawl_id, domain, false)
:registered ->
Utils.insert(crawl_id, domain, false)
:available ->
Utils.insert(crawl_id, domain, true)
end
end
worker()
end
end
In your Workers.Application when starting Supervisors, you'r providing unique ids, but they also should have unique names.
Try adding another keyword, something like name: :"url_supervisor_#{i}":
def start(_type, _args) do
import Supervisor.Spec, warn: false
url_workers = 1..100 |> Enum.map(fn (i) ->
supervisor(Workers.UrlSupervisor, [i],
[id: {Workers.UrlSupervisor, i},
name: :"url_supervisor_#{i}", # Name added here
restart: :temporary])
end)
domain_workers = 1..100 |> Enum.map(fn (i) ->
supervisor(Workers.DomainSupervisor, [i],
[id: {Workers.DomainSupervisor, i},
name: :"domain_supervisor_#{i}", # Name added here
restart: :temporary])
end)
opts = [strategy: :one_for_one, name: Workers.Supervisor]
Supervisor.start_link(url_workers ++ domain_workers, opts)
end

How to made a http request in a thread an keep the call order?

I want to do a function which call a remote service each second. To do this, I have something like this :
stop = false
text = ""
while stop == false
r = RestClient.post 'http://example.com'
text += r.to_str
sleep 1
# after a treatment, the value of stop will set to true
end
The problem is than the program is blocked until the http request is done and I don't want it. I can put this code in a subprocess but I want to keep the result in call order. For example, I can have this request :
time | anwser
--------------
10 | Happy
100 | New
10 | Year
The second request is longer than the third so, with threads, I will have the third result before the second and the value of the variable text will be HappyYearNew and I want HappyNewYear.
I there a way to have multiple process and to keep the original order? It's a very little program, I don't want to have to install a server like redis if it's possible.
Using hash
Since ruby-1.9, hash keys order is guaranteed. A simple solution here would be to take advantage of that, by putting your requests in a hash and storing their result accessing the hash element by its key :
requests = {
foo: [ 'a', 1 ],
bar: [ 'b', 5 ],
foobar: [ 'c', 2 ]
}
requests.each do |name, config|
Thread.new( name, config ) do |name, config|
sleep config[1]
requests[ name ] = config[0]
end
end
sleep 6
requests.each do |name, result|
puts "#{name} : #{result}"
end
Produces :
foo : a
bar : b
foobar : c
Thus, to match your provided code :
stop, i, text, requests = false, 0, '', {}
until stop
i += 1
requests[ i ] = nil
Thread.new( i ) do |key|
r = RestClient.post 'http://example.com'
requests[ i ] = r.to_str
sleep 1
# after a treatment, the value of stop will set to true
end
end
# you will have to join threads, here
text = requests.values.join
Using array
If the last example is good for you, you could even simplify that using an array. Array order is of course guaranteed too, and you can take advantage of ruby array dynamic size nature :
a = []
a[5] = 1
p a
=> [nil, nil, nil, nil, nil, 1]
So, previous example can be rewritten :
stop, i, text, requests = false, 0, '', []
until stop
i += 1
Thread.new( i ) do |key|
r = RestClient.post 'http://example.com'
requests[ i ] = r.to_str
sleep 1
# after a treatment, the value of stop will set to true
end
end
# you will have to join threads, here
text = requests.join
Here's a pretty simple solution with Threads. I have results and rmutex as instance variables, you could make them global, class, or a lot of other things:
stop = false
Thread.current[:results] = {}
Thread.current[:rmutex] = Mutex.new
counter = 0
while(!stop)
Thread.new(counter, Thread.current) do |idex, parent|
r = RestClient.post 'http://example.com'
parent[:rmutex].lock
parent[:results][idex] = r.to_str
parent[:rmutex].unlock
end
sleep 1
end
text = Thread.current[:results].to_a.sort_by {|o| o[0]}.map {|o| o[1]}.join
This works by storing the "index" into fetching that each threads operation is at, storing the result with its index, and putting it all together after sorting by index at the end.

Resources