Memory corruption with fortran90 deallocate statement - memory

I've built a minimal example of distributing fortran derived types using MPI_PACK, MPI_SEND, MPI_RECV, and also exchanging their boundaries to test MPI_SENDRECV for MPI_PACKED derived types.
The code just works fine, but it shows some strange behavior that I attribute to some memory corruption if I put deallocate statement in the middle of the code, while the code works fine with the deallocate statement at the end of the code. The dellocate statements are marked with (*) at the left side of the main script.
The flow of the code is,
1) MPI_PACK the whole derived type.
2) Distribute with MPI_SEND, MPI_RECV, and MPI_UNPACK recovering the derived type
structure.
3) MPI_PACK the boundaries of the distributed local derived type.
4) Exchange boundaries between adjacent processors using MPI_SENDRECV
I've put the exactly same code that I tested, so they would compile well with like mpif90 mod_data_structure.f90 main.f90 -o main, and the problem would be totally reproducible. The results below are the output from mpirun -np 2 main.
module mod_data_structure
implicit none
type type_cell
real(selected_real_kind(15,307)):: xc(2)
real(selected_real_kind(15,307)):: values_c(8)
integer :: flag_boundary
end type type_cell
type type_cell_list
type(type_cell) :: cell(13,13)
end type type_cell_list
type type_cell_list_local
type(type_cell),allocatable :: cell(:,:)
end type type_cell_list_local
end module mod_data_structure
program main
use MPI
use mod_data_structure
implicit none
integer,parameter :: nxmax = 9, nymax = 9, nbc = 2
integer :: i, j, k, ii, jj
type(type_cell_list) :: A
type(type_cell_list_local) :: A_local
type(type_cell) :: acell
character(len=20) :: write_fmt
! MPI variables
integer :: n_proc, my_id, ierr, source, dest
integer :: tag, tag_send, tag_recv
integer :: status ( MPI_STATUS_SIZE ), &
status_l ( MPI_STATUS_SIZE ), &
status_r ( MPI_STATUS_SIZE )
integer,allocatable :: local_size(:), local_start(:)
real(selected_real_kind(15,307)):: tmp
character,allocatable :: buffer(:), buffer_l(:), buffer_lg(:), buffer_r(:), buffer_rg(:)
integer :: bufsize, bufsize_gc
integer :: left_proc, right_proc
integer :: DBL_SIZE, INT_SIZE, position_local
integer :: position_l, position_r
integer,allocatable :: position(:)
call MPI_INIT ( ierr )
call MPI_COMM_RANK ( MPI_COMM_WORLD, my_id, ierr )
call MPI_COMM_SIZE ( MPI_COMM_WORLD, n_proc, ierr )
call MPI_PACK_SIZE(1,MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,DBL_SIZE,ierr)
call MPI_PACK_SIZE(1,MPI_INTEGER ,MPI_COMM_WORLD,INT_SIZE,ierr)
! Construct the derived data types
if ( my_id .eq. 0 ) then
do i = 1,nxmax+2*nbc
do j = 1,nymax+2*nbc
A%cell(i,j)%flag_boundary = 0
do k = 1,8
A%cell(i,j)%values_c(k) = 0.d0
enddo
do k = 1,2
A%cell(i,j)%xc(k) = 0.d0
enddo
enddo
enddo
do i = 1+nbc,nxmax+nbc
do j = 1+nbc,nymax+nbc
ii = i - nbc
jj = j - nbc
A%cell(i,j)%flag_boundary = 10*ii + jj
do k = 1,8
A%cell(i,j)%values_c(k) = 1.d1*ii + jj + 0.1d0*k
enddo
do k = 1,2
A%cell(i,j)%xc(k) = 1.d1*ii + jj + 0.1d0*k
enddo
enddo
enddo
write(write_fmt, '(a,i,a)') '(',nymax+2*nbc,'i3)'
write(*,*) 'my_id ', my_id
write(*,*) 'Total flag_boundary'
do i = 1,nxmax+2*nbc
write(*,write_fmt) A%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!*** Test MPI_PACK and MPI_SEND / MPI_RECV
! Prepare for the distribution
allocate ( local_size(n_proc), local_start(n_proc), position(n_proc) )
local_size = 0
local_start = 1
tmp = (nymax+2*nbc) / n_proc
! 'local_size'
do i = 1,n_proc-1
local_size(i) = ceiling(tmp)
enddo
local_size(n_proc) = nymax + 2*nbc - (n_proc - 1)*ceiling(tmp)
allocate ( A_local%cell(nxmax+2*nbc,local_size(my_id+1)) ) ! ###
! 'local_start'
do i = 1,n_proc-1
local_start(i+1:n_proc) = local_start(i+1:n_proc) + local_size(i)
enddo
! allocate 'buffer'
bufsize = maxval(local_size) * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
allocate ( buffer(bufsize) )
position = 0
if ( my_id .eq. 0 ) then
! Assign 'A_local' for 'my_id .eq. 0' itself
do j = 1, local_size(my_id+1)
do i = 1, nxmax+2*nbc
A_local%cell(i,j) = A%cell(i,j)
enddo
enddo
do k = 2, n_proc ! w/o 'my_id .eq. 0' itself
do j = local_start(k), local_start(k) + local_size(k) - 1
do i = 1,nxmax+2*nbc
acell = A%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
enddo
enddo
dest = k-1 ! ###
tag = k-1
call MPI_SEND (buffer, bufsize, MPI_PACKED, dest, tag, MPI_COMM_WORLD, ierr )
enddo
else ! ( my_id .ne. 0 ) then
source = 0
tag = my_id
call MPI_RECV (buffer, bufsize, MPI_PACKED, source, tag, MPI_COMM_WORLD, status, ierr )
position_local = 0
do j = 1, local_size(my_id+1)
do i = 1, nxmax+2*nbc
call MPI_UNPACK (buffer, bufsize, position_local, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer, bufsize, position_local, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer, bufsize, position_local, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
endif
(*)!deallocate ( buffer )
do k = 1,n_proc
if ( my_id .eq. (k-1) ) then
write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
write(*,*) ' Before MPI_SENDRECV'
write(*,*) 'my_id ', my_id
write(*,*) 'cols ', local_size(my_id+1)
do i = 1,nxmax+2*nbc
write(*,write_fmt) A_local%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!call MPI_BARRIER ( MPI_COMM_WORLD, ierr )
enddo
! Test MPI_SENDRECV
bufsize_gc = nbc * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
allocate ( buffer_l(bufsize_gc), buffer_lg(bufsize_gc), buffer_r(bufsize_gc), buffer_rg(bufsize_gc) )
! 'left_proc'
if ( my_id .eq. 0 ) then
left_proc = MPI_PROC_NULL
else ! ( my_id .ne. 0 ) then
left_proc = my_id - 1
endif
! 'right_proc'
if ( my_id .eq. n_proc-1 ) then
right_proc = MPI_PROC_NULL
else ! ( my_id .ne. n_proc - 1 )
right_proc = my_id + 1
endif
! pack 'buffer_l' & 'buffer_r'
position_l = 0
do j = 1,nbc
do i = 1,nxmax+2*nbc
acell = A_local%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
enddo
enddo
position_r = 0
do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
do i = 1,nxmax+2*nbc
acell = A_local%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
enddo
enddo
tag_send = my_id
tag_recv = right_proc
call MPI_SENDRECV (buffer_l, bufsize_gc, MPI_PACKED, left_proc, 0, &
buffer_rg, bufsize_gc, MPI_PACKED, right_proc, 0, &
MPI_COMM_WORLD, status_l, ierr )
tag_send = my_id
tag_recv = left_proc
call MPI_SENDRECV (buffer_r, bufsize_gc, MPI_PACKED, right_proc, 0, &
buffer_lg, bufsize_gc, MPI_PACKED, left_proc, 0, &
MPI_COMM_WORLD, status_r, ierr )
! fill left boundary
position_l = 0
do j = 1,nbc
do i = 1,nxmax+2*nbc
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
! fill right boundary
position_r = 0
do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
do i = 1,nxmax+2*nbc
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
do k = 1,n_proc
if ( my_id .eq. (k-1) ) then
write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
write(*,*) ' After MPI_SENDRECV'
write(*,*) 'my_id ', my_id
write(*,*) 'cols ', local_size(my_id+1)
do i = 1,nxmax+2*nbc
write(*,write_fmt) A_local%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!call MPI_BARRIER ( MPI_COMM_WORLD, ierr )
enddo
(*)deallocate ( buffer )
deallocate ( buffer_l, buffer_lg, buffer_r, buffer_rg )
call MPI_FINALIZE ( ierr )
end program
With deallocate(buffer) in the middle of the code, a part of the output looks like below, which worked as I intended.
After MPI_SENDRECV
my_id 0
cols 6
0 0 0 0 0 0
0 0 0 0 0 0
0 0 11 12 15 16
0 0 21 22 25 26
0 0 31 32 35 36
0 0 41 42 45 46
0 0 51 52 55 56
0 0 61 62 65 66
0 0 71 72 75 76
0 0 81 82 85 86
0 0 91 92 95 96
0 0 0 0 0 0
0 0 0 0 0 0
But if I locate deallocate(buffer) in the middle of the code, the same part of the output looks like this.
After MPI_SENDRECV
my_id 0
cols 6
0 0 0 0 0 0
****** 0 0 0 0
****** 11 12 15 16
****** 21 22 25 26
****** 31 32 35 36
****** 41 42 45 46
****** 51 52 55 56
****** 61 62 65 66
****** 71 72 75 76
****** 81 82 85 86
0 0 91 92 95 96
0 0 0 0 0 0
0 0 0 0 0 0
And if I change write format to show more digits of integer, they are 10 digits of integer which goes like 1079533568.
I've seen this kind of problem at Segmentation Fault using MPI_Sendrecv with a 2D contiguous array, but there were no clear answer to the reason why putting the deallocate statement of variables that I wouldn't use for the rest of the code at the middle of the code makes such problem.
Where this problem stems from?

I am not sure if I'm answering this question fairly, but my practical experience with derived types is that the safest way to handle them with different MPI implementations is to not use any advanced MPI constructs and keep all derived type work on the Fortran side.
For example, I would write pure functions to pack and expand your datatypes:
integer, parameter :: TYPE_CELL_BUFSIZE = 11
pure function type_cell_pack(this) result(buffer)
class(type_cell), intent(in) :: this
real(real64) :: buffer(TYPE_CELL_BUFSIZE)
buffer(1:8) = this%values_c
buffer(9:10) = this%xc
! It will be faster to not use a separate MPI command for this only
buffer(11) = real(this%flag_boundary,real64)
end function type_cell_pack
pure type(type_cell) function type_cell_unpack(buffer) result(this)
real(real64), intent(in) :: buffer(TYPE_CELL_BUFSIZE)
this%values_c = buffer(1:8)
this%xc = buffer(9:10)
this%flag_boundary = nint(buffer(11))
end function type_cell_unpack
And then write two wrappers for MPI comms using MPI_send and MPI_recv only, like this for a scalar quantity:
subroutine type_cell_send_scalar(this,fromCpu,toCpu,mpiWorld)
type(type_cell), intent(inout) :: this
integer, intent(in) :: fromCpu,toCpu,mpiWorld
real(real64) :: mpibuf(TYPE_CELL_BUFSIZE)
if (cpuid==fromCpu) then
mpibuf = type_cell_pack(this)
call mpi_send(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
elseif (cpuid==toCpu) then
call mpi_recv(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
this = type_cell_unpack(mpibuf)
endif
end subroutine type_cell_send_scalar
And the following for an array quantity:
subroutine type_cell_send_array(these,fromCpu,toCpu,mpiWorld)
type(type_cell), intent(inout) :: these(:)
integer, intent(in) :: fromCpu,toCpu,mpiWorld
integer :: i,ncell,bufsize
real(real64) :: mpibuf(TYPE_CELL_BUFSIZE*size(these))
ncell = size(these)
bufsize = ncell*TYPE_CELL_BUFSIZE
if (cpuid==fromCpu) then
do i=1,ncell
mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE) = type_cell_pack(these(i))
end do
call mpi_send(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
elseif (cpuid==toCpu) then
call mpi_recv(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
do i=1,ncell
these(i) = type_cell_unpack(mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE))
end do
endif
end subroutine type_cell_send_array

Related

Creating a crc8 function with lua

Is there a simple algorithm to create a crc8 checksum from a table in lua?
Polynomial should be x^8+x^5+x^4+1 (0x31)
This algorithm will be used to check the UID of the DS28CM00 UID-chip.
Here you can find a table returned by the chip (LS-byte last) :
table = {112,232,9,80,1,0,0}
Thanks for any help
For Lua 5.3+
local function crc8(t)
local c = 0
for _, b in ipairs(t) do
for i = 0, 7 do
c = c >> 1 ~ ((c ~ b >> i) & 1) * 0x8C
end
end
return c
end
print(crc8{112, 232, 9, 80, 1, 0, 0}) --> 219
print(crc8{2, 0x1C, 0xB8, 1, 0, 0, 0}) --> 0xA2 as in example from AN-27
For Lua 5.2-
local function crc8(t)
local c = 0
for _, b in ipairs(t) do
for i = 0, 7 do
local c0 = c % 2
local b0 = b % 2
c = (c - c0) / 2
b = (b - b0) / 2
if c0 + b0 == 1 then
c = c + 0x80 + (c % 16 < 8 and 8 or -8) + (c % 8 < 4 and 4 or -4)
end
end
end
return c
end

using previous/linked value in equation

It is possible to resolve this kind of equation :
const = [0x50, 0xe8, 0bcb, 0x9f, 0xa1]
data = IntVector('data', len(const))
for i in range(0, len(const)-1):
s.add(data[i] >= 32, data[i] <= 126)
s.add(data[i+1] >= 32, data[i+1] <= 126)
s.add(data[i] + data[i+1] == const[i]
or I misuse Z3 library ?
Yes. After fixing the obvious syntax errors, my Z3 reports it as unsatisfiable.
Supposing that 0bcb means something like 0xcb, then the list of constants is [80, 232, 203, 159, 161].
Then you ask for 5 variables, lets call them d0, d1, d2, d3, d4, which are each between 32 and 126. And where d0 + d1 == 80 (so d1 <= 48) and d1 + d2 == 232 (so d1 >= 106). This clearly contradicts, in which Z3 agrees. (Note that your constraints don't use the last element of const.)
Here is a slightly more pythonic version of the code which includes each of the 5 constants (with the second constant lowered to obtain a solvable system of constraints):
from Z3 import IntVector, Solver, sat
const = [0x50, 0xa9, 0xcb, 0x9f, 0xa1]
s = Solver()
data = IntVector('data', len(const)+1)
for d in data:
s.add(d >= 32, d <= 126)
for d0, d1, c0 in zip(data, data[1:], const):
s.add(d0 + d1 == c0)
result = s.check()
if result == sat:
print("Here is a solution: ")
m = s.model()
values = [m[d].as_long() for d in data]
print(values, " sums:", [hex(v0 + v1) for v0, v1 in zip(values, values[1:])])
elif result == unsat:
print("There is no solution")
else:
print("Z3 could not solve the constraints")
Output:
Here is a solution:
[37, 43, 126, 77, 82, 79] sums: ['0x50', '0xa9', '0xcb', '0x9f', '0xa1']

SHA512 pure Lua 5.1 adaptation

I was searching for a pure Lua 5.1 adaptation for SHA512 and yielded no results anywhere I went. I found a similar question where someone tried to convert the SHA256 adaptation into SHA512 (except he was using Lua 5.3):
Adaptation of SHA2 512 gives incorrect results
Basically I couldn't use bitwise operators (not implemented in Lua 5.1) so I had to write my own implementations of them.
This is my code:
local MOD = 2^64;
local MODM = MOD-1;
local function memoize(f)
local mt = {}
local t = setmetatable({}, mt)
function mt:__index(k)
local v = f(k)
t[k] = v
return v
end
return t
end
local function make_bitop_uncached(t, m)
local function bitop(a, b)
local res,p = 0,1
while a ~= 0 and b ~= 0 do
local am, bm = a % m, b % m
res = res + t[am][bm] * p
a = (a - am) / m
b = (b - bm) / m
p = p*m
end
res = res + (a + b) * p
return res
end
return bitop
end
local function make_bitop(t)
local op1 = make_bitop_uncached(t,2^1)
local op2 = memoize(function(a) return memoize(function(b) return op1(a, b)
end) end)
return make_bitop_uncached(op2, 2 ^ (t.n or 1))
end
local bxor1 = make_bitop({[0] = {[0] = 0,[1] = 1}, [1] = {[0] = 1, [1] = 0}, n = 4})
local function bxor(a, b, c, ...)
local z = nil
if b then
a = a % MOD
b = b % MOD
z = bxor1(a, b)
if c then z = bxor(z, c, ...) end
return z
elseif a then return a % MOD
else return 0 end
end
local function band(a, b, c, ...)
local z
if b then
a = a % MOD
b = b % MOD
z = ((a + b) - bxor1(a,b)) / 2
if c then z = bit32_band(z, c, ...) end
return z
elseif a then return a % MOD
else return MODM end
end
local function bnot(x) return (-1 - x) % MOD end
local function rshift1(a, disp)
if disp < 0 then return lshift(a,-disp) end
return math.floor(a % 2 ^ 32 / 2 ^ disp)
end
local function rshift(x, disp)
if disp > 31 or disp < -31 then return 0 end
return rshift1(x % MOD, disp)
end
local function lshift(a, disp)
if disp < 0 then return rshift(a,-disp) end
return (a * 2 ^ disp) % 2 ^ 32
end
-- UTILITY FUNCTIONS
--
-- transform a string of bytes in a string of hexadecimal digits
local function str2hexa (s)
local h = string.gsub(s, ".", function(c)
return string.format("%02x", string.byte(c))
end)
return h
end
-- transforms number 'l' into a big-endian sequence of 'n' bytes
--(coded as a string)
local function num2string(l, n)
local s = ""
for i = 1, n do
--most significant byte of l
local remainder = l % 256
s = string.char(remainder) .. s
--remove from l the bits we have already transformed
l = (l-remainder) / 256;
end
return s
end
-- transform the big-endian sequence of eight bytes starting at
-- index 'i' in 's' into a number
local function s264num (s, i)
local n = 0
for i = i, i + 7 do
n = n*256 + string.byte(s, i)
end
return n
end
--
-- MAIN SECTION
--
-- FIRST STEP: INITIALIZE HASH VALUES
--(second 32 bits of the fractional parts of the square roots of the first
9th through 16th primes 23..53)
local HH = {}
local function initH512(H)
H = {0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179}
return H
end
-- SECOND STEP: INITIALIZE ROUND CONSTANTS
--(first 80 bits of the fractional parts of the cube roots of the first 80 primes 2..409)
local k = {
0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, 0x3956c25bf348b538,
0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, 0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235,
0xc19bf174cf692694, 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, 0x983e5152ee66dfab,
0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, 0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70, 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed,
0x53380d139d95b3df, 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, 0xd192e819d6ef5218,
0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, 0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373,
0x682e6ff3d6b2b8a3, 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, 0xca273eceea26619c,
0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, 0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b, 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc,
0x431d67c49c100d4c, 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
}
-- THIRD STEP: PRE-PROCESSING (padding)
local function preprocess(toProcess, len)
--append a single '1' bit
--append K '0' bits, where K is the minimum number >= 0 such that L + 1 + K = 896mod1024
local extra = - (len + 17) % 128 + 8
len = num2string(8 * len, 8)
toProcess = toProcess .. "\128" .. string.rep("\0", extra) .. len
assert(#toProcess % 128 == 0)
return toProcess
end
local function rrotate(rot, n)
return rshift(rot, n) or (rshift(rot, 64 - n))
end
local function digestblock(msg, i, H)
local w = {}
for j = 1, 16 do w[j] = s264num(msg, i + (j - 1) * 8) end
for j = 17, 80 do
local v = w[j - 15]
local s0 = bxor(rrotate(v, 1), rrotate(v, 8), rshift(v, 7))
v = w[j - 2]
w[j] = w[j - 16] + s0 + w[j - 7] + bxor(rrotate(v, 19), rrotate(v, 61),
rshift(v, 6))
end
local a, b, c, d, e, f, g, h = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
for i = 1, 80 do
a, b, c, d, e, f, g, h = a , b , c , d , e , f , g , h
local s0 = bxor(rrotate(a, 28), rrotate(a, 34), rrotate(a, 39))
local maj = bxor(band(a, b), band(a, c), band(b, c))
local t2 = s0 + maj
local s1 = bxor(rrotate(e, 14), rrotate(e, 18), rrotate(e, 41))
local ch = bxor (band(e, f), band(bnot(e), g))
local t1 = h + s1 + ch + k[i] + w[i]
h, g, f, e, d, c, b, a = g, f, e, d + t1, c, b, a, t1 + t2
end
H[1] = (H[1] + a)
H[2] = (H[2] + b)
H[3] = (H[3] + c)
H[4] = (H[4] + d)
H[5] = (H[5] + e)
H[6] = (H[6] + f)
H[7] = (H[7] + g)
H[8] = (H[8] + h)
end
local function finalresult512 (H)
-- Produce the final hash value:
return
str2hexa(num2string(H[1], 8)..num2string(H[2], 8)..num2string(H[3], 8)..num2string(H[4], 8)..
num2string(H[5], 8)..num2string(H[6], 8)..num2string(H[7], 8)..num2string(H[8], 8))
end
-- Returns the hash512 for the given string.
local function hash512 (msg)
msg = preprocess(msg, #msg)
local H = initH512(HH)
-- Process the message in successive 1024-bit (128 bytes) chunks:
for i = 1, #msg, 128 do
digestblock(msg, i, H)
end
return finalresult512(H)
end
print( hash512("a") )
At the end, when "a" is hashed, it turns into this:
8c14f3e36400000074d6c495c0000000fd2e4ad8b40000009a78880fb00000002c13f4fdc0000000bf50f67658000000cdf76c796c000000df8163cae8000000
Instead of the actual hash (which is this):
1F40FC92DA241694750979EE6CF582F2D5D7D28E18335DE05ABC54D0560E0F5302860C652BF08D560252AA5E74210546F369FBBBCE8C12CFC7957B2652FE9A75
So my question is, why is it wielding such different results. Is it a problem with the bitwise operator functions? I am stumped.
Here is a working implementation of SHA512 for Lua 5.1
File sha2for51.lua
-- This module contains functions to calculate SHA2 digest.
-- Supported hashes: SHA-224, SHA-256, SHA-384, SHA-512, SHA-512/224, SHA-512/256
-- This is a pure-Lua module, compatible with Lua 5.1
-- It works on Lua 5.1/5.2/5.3/5.4/LuaJIT, but it doesn't use benefits of Lua versions 5.2+
-- Input data may must be provided either as a whole string or as a sequence of substrings (chunk-by-chunk).
-- Result (SHA2 digest) is a string of lowercase hex digits.
--
-- Simplest usage example:
-- local your_hash = require("sha2for51").sha512("your string")
-- See file "sha2for51_test.lua" for more examples.
local unpack, table_concat, byte, char, string_rep, sub, string_format, floor, ceil, min, max =
table.unpack or unpack, table.concat, string.byte, string.char, string.rep, string.sub, string.format, math.floor, math.ceil, math.min, math.max
--------------------------------------------------------------------------------
-- BASIC BITWISE FUNCTIONS
--------------------------------------------------------------------------------
-- 32-bit bitwise functions
local AND, OR, XOR, SHL, SHR, ROL, ROR, HEX
-- Only low 32 bits of function arguments matter, high bits are ignored
-- The result of all functions (except HEX) is an integer (pair of integers) inside range 0..(2^32-1)
function SHL(x, n)
return (x * 2^n) % 4294967296
end
function SHR(x, n)
x = x % 4294967296 / 2^n
return x - x % 1
end
function ROL(x, n)
x = x % 4294967296 * 2^n
local r = x % 4294967296
return r + (x - r) / 4294967296
end
function ROR(x, n)
x = x % 4294967296 / 2^n
local r = x % 1
return r * 4294967296 + (x - r)
end
local AND_of_two_bytes = {} -- look-up table (256*256 entries)
for idx = 0, 65535 do
local x = idx % 256
local y = (idx - x) / 256
local res = 0
local w = 1
while x * y ~= 0 do
local rx = x % 2
local ry = y % 2
res = res + rx * ry * w
x = (x - rx) / 2
y = (y - ry) / 2
w = w * 2
end
AND_of_two_bytes[idx] = res
end
local function and_or_xor(x, y, operation)
-- operation: nil = AND, 1 = OR, 2 = XOR
local x0 = x % 4294967296
local y0 = y % 4294967296
local rx = x0 % 256
local ry = y0 % 256
local res = AND_of_two_bytes[rx + ry * 256]
x = x0 - rx
y = (y0 - ry) / 256
rx = x % 65536
ry = y % 256
res = res + AND_of_two_bytes[rx + ry] * 256
x = (x - rx) / 256
y = (y - ry) / 256
rx = x % 65536 + y % 256
res = res + AND_of_two_bytes[rx] * 65536
res = res + AND_of_two_bytes[(x + y - rx) / 256] * 16777216
if operation then
res = x0 + y0 - operation * res
end
return res
end
function AND(x, y)
return and_or_xor(x, y)
end
function OR(x, y)
return and_or_xor(x, y, 1)
end
function XOR(x, y, z) -- 2 or 3 arguments
if z then
y = and_or_xor(y, z, 2)
end
return and_or_xor(x, y, 2)
end
function HEX(x)
return string_format("%08x", x % 4294967296)
end
-- Arrays of SHA2 "magic numbers"
local sha2_K_lo, sha2_K_hi, sha2_H_lo, sha2_H_hi = {}, {}, {}, {}
local sha2_H_ext256 = {[224] = {}, [256] = sha2_H_hi}
local sha2_H_ext512_lo, sha2_H_ext512_hi = {[384] = {}, [512] = sha2_H_lo}, {[384] = {}, [512] = sha2_H_hi}
local common_W = {} -- a temporary table shared between all calculations
local function sha256_feed_64(H, K, str, W, offs, size)
-- offs >= 0, size >= 0, size is multiple of 64
for pos = offs, size + offs - 1, 64 do
for j = 1, 16 do
pos = pos + 4
local a, b, c, d = byte(str, pos - 3, pos)
W[j] = ((a * 256 + b) * 256 + c) * 256 + d
end
for j = 17, 64 do
local a, b = W[j-15], W[j-2]
W[j] = XOR(ROR(a, 7), ROL(a, 14), SHR(a, 3)) + XOR(ROL(b, 15), ROL(b, 13), SHR(b, 10)) + W[j-7] + W[j-16]
end
local a, b, c, d, e, f, g, h, z = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
for j = 1, 64 do
z = XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + AND(e, f) + AND(-1-e, g) + h + K[j] + W[j]
h = g
g = f
f = e
e = z + d
d = c
c = b
b = a
a = z + AND(d, c) + AND(a, XOR(d, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10))
end
H[1], H[2], H[3], H[4] = (a + H[1]) % 4294967296, (b + H[2]) % 4294967296, (c + H[3]) % 4294967296, (d + H[4]) % 4294967296
H[5], H[6], H[7], H[8] = (e + H[5]) % 4294967296, (f + H[6]) % 4294967296, (g + H[7]) % 4294967296, (h + H[8]) % 4294967296
end
end
local function sha512_feed_128(H_lo, H_hi, K_lo, K_hi, str, W, offs, size)
-- offs >= 0, size >= 0, size is multiple of 128
-- W1_hi, W1_lo, W2_hi, W2_lo, ... Wk_hi = W[2*k-1], Wk_lo = W[2*k]
for pos = offs, size + offs - 1, 128 do
for j = 1, 32 do
pos = pos + 4
local a, b, c, d = byte(str, pos - 3, pos)
W[j] = ((a * 256 + b) * 256 + c) * 256 + d
end
local tmp1, tmp2
for jj = 17 * 2, 80 * 2, 2 do
local a_lo, a_hi, b_lo, b_hi = W[jj-30], W[jj-31], W[jj-4], W[jj-5]
tmp1 = XOR(SHR(a_lo, 1) + SHL(a_hi, 31), SHR(a_lo, 8) + SHL(a_hi, 24), SHR(a_lo, 7) + SHL(a_hi, 25)) + XOR(SHR(b_lo, 19) + SHL(b_hi, 13), SHL(b_lo, 3) + SHR(b_hi, 29), SHR(b_lo, 6) + SHL(b_hi, 26)) + W[jj-14] + W[jj-32]
tmp2 = tmp1 % 4294967296
W[jj-1] = XOR(SHR(a_hi, 1) + SHL(a_lo, 31), SHR(a_hi, 8) + SHL(a_lo, 24), SHR(a_hi, 7)) + XOR(SHR(b_hi, 19) + SHL(b_lo, 13), SHL(b_hi, 3) + SHR(b_lo, 29), SHR(b_hi, 6)) + W[jj-15] + W[jj-33] + (tmp1 - tmp2) / 4294967296
W[jj] = tmp2
end
local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo, z_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi, z_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
for j = 1, 80 do
local jj = 2 * j
tmp1 = XOR(SHR(e_lo, 14) + SHL(e_hi, 18), SHR(e_lo, 18) + SHL(e_hi, 14), SHL(e_lo, 23) + SHR(e_hi, 9)) + AND(e_lo, f_lo) + AND(-1-e_lo, g_lo) + h_lo + K_lo[j] + W[jj]
z_lo = tmp1 % 4294967296
z_hi = XOR(SHR(e_hi, 14) + SHL(e_lo, 18), SHR(e_hi, 18) + SHL(e_lo, 14), SHL(e_hi, 23) + SHR(e_lo, 9)) + AND(e_hi, f_hi) + AND(-1-e_hi, g_hi) + h_hi + K_hi[j] + W[jj-1] + (tmp1 - z_lo) / 4294967296
h_lo = g_lo
h_hi = g_hi
g_lo = f_lo
g_hi = f_hi
f_lo = e_lo
f_hi = e_hi
tmp1 = z_lo + d_lo
e_lo = tmp1 % 4294967296
e_hi = z_hi + d_hi + (tmp1 - e_lo) / 4294967296
d_lo = c_lo
d_hi = c_hi
c_lo = b_lo
c_hi = b_hi
b_lo = a_lo
b_hi = a_hi
tmp1 = z_lo + AND(d_lo, c_lo) + AND(b_lo, XOR(d_lo, c_lo)) + XOR(SHR(b_lo, 28) + SHL(b_hi, 4), SHL(b_lo, 30) + SHR(b_hi, 2), SHL(b_lo, 25) + SHR(b_hi, 7))
a_lo = tmp1 % 4294967296
a_hi = z_hi + (AND(d_hi, c_hi) + AND(b_hi, XOR(d_hi, c_hi))) + XOR(SHR(b_hi, 28) + SHL(b_lo, 4), SHL(b_hi, 30) + SHR(b_lo, 2), SHL(b_hi, 25) + SHR(b_lo, 7)) + (tmp1 - a_lo) / 4294967296
end
tmp1 = H_lo[1] + a_lo
tmp2 = tmp1 % 4294967296
H_lo[1], H_hi[1] = tmp2, (H_hi[1] + a_hi + (tmp1 - tmp2) / 4294967296) % 4294967296
tmp1 = H_lo[2] + b_lo
tmp2 = tmp1 % 4294967296
H_lo[2], H_hi[2] = tmp2, (H_hi[2] + b_hi + (tmp1 - tmp2) / 4294967296) % 4294967296
tmp1 = H_lo[3] + c_lo
tmp2 = tmp1 % 4294967296
H_lo[3], H_hi[3] = tmp2, (H_hi[3] + c_hi + (tmp1 - tmp2) / 4294967296) % 4294967296
tmp1 = H_lo[4] + d_lo
tmp2 = tmp1 % 4294967296
H_lo[4], H_hi[4] = tmp2, (H_hi[4] + d_hi + (tmp1 - tmp2) / 4294967296) % 4294967296
tmp1 = H_lo[5] + e_lo
tmp2 = tmp1 % 4294967296
H_lo[5], H_hi[5] = tmp2, (H_hi[5] + e_hi + (tmp1 - tmp2) / 4294967296) % 4294967296
tmp1 = H_lo[6] + f_lo
tmp2 = tmp1 % 4294967296
H_lo[6], H_hi[6] = tmp2, (H_hi[6] + f_hi + (tmp1 - tmp2) / 4294967296) % 4294967296
tmp1 = H_lo[7] + g_lo
tmp2 = tmp1 % 4294967296
H_lo[7], H_hi[7] = tmp2, (H_hi[7] + g_hi + (tmp1 - tmp2) / 4294967296) % 4294967296
tmp1 = H_lo[8] + h_lo
tmp2 = tmp1 % 4294967296
H_lo[8], H_hi[8] = tmp2, (H_hi[8] + h_hi + (tmp1 - tmp2) / 4294967296) % 4294967296
end
end
--------------------------------------------------------------------------------
-- CALCULATING THE MAGIC NUMBERS (roots of primes)
--------------------------------------------------------------------------------
do
local function mul(src1, src2, factor, result_length)
-- Long arithmetic multiplication: src1 * src2 * factor
-- src1, src2 - long integers (arrays of digits in base 2^24)
-- factor - short integer
local result = {}
local carry = 0
local value = 0.0
local weight = 1.0
for j = 1, result_length do
local prod = 0
for k = max(1, j + 1 - #src2), min(j, #src1) do
prod = prod + src1[k] * src2[j + 1 - k]
end
carry = carry + prod * factor
local digit = carry % 16777216
result[j] = digit
carry = floor(carry / 16777216)
value = value + digit * weight
weight = weight * 2^24
end
return
result, -- long integer
value -- and its floating point approximation
end
local idx, step, p, one = 0, {4, 1, 2, -2, 2}, 4, {1}
local sqrt_hi, sqrt_lo, idx_disp = sha2_H_hi, sha2_H_lo, 0
repeat
p = p + step[p % 6]
local d = 1
repeat
d = d + step[d % 6]
if d * d > p then
idx = idx + 1
local root = p^(1/3)
local R = mul({floor(root * 2^40)}, one, 1, 2)
local _, delta = mul(R, mul(R, R, 1, 4), -1, 4)
local hi = R[2] % 65536 * 65536 + floor(R[1] / 256)
local lo = R[1] % 256 * 16777216 + floor(delta * (2^-56 / 3) * root / p)
sha2_K_hi[idx], sha2_K_lo[idx] = hi, lo
if idx < 17 then
root = p^(1/2)
R = mul({floor(root * 2^40)}, one, 1, 2)
_, delta = mul(R, R, -1, 2)
hi = R[2] % 65536 * 65536 + floor(R[1] / 256)
lo = R[1] % 256 * 16777216 + floor(delta * 2^-17 / root)
sha2_H_ext256[224][idx + idx_disp] = lo
sqrt_hi[idx + idx_disp], sqrt_lo[idx + idx_disp] = hi, lo
if idx == 8 then
sqrt_hi, sqrt_lo, idx_disp = sha2_H_ext512_hi[384], sha2_H_ext512_lo[384], -8
end
end
break
end
until p % d == 0
until idx > 79
end
-- Calculating IV for SHA512/224 and SHA512/256
for width = 224, 256, 32 do
local H_lo, H_hi = {}, {}
for j = 1, 8 do
H_lo[j] = XOR(sha2_H_lo[j], 0xa5a5a5a5)
H_hi[j] = XOR(sha2_H_hi[j], 0xa5a5a5a5)
end
sha512_feed_128(H_lo, H_hi, sha2_K_lo, sha2_K_hi, "SHA-512/"..tonumber(width).."\128"..string_rep("\0", 115).."\88", common_W, 0, 128)
sha2_H_ext512_lo[width] = H_lo
sha2_H_ext512_hi[width] = H_hi
end
--------------------------------------------------------------------------------
-- FINAL FUNCTIONS
--------------------------------------------------------------------------------
local function sha256ext(width, text)
-- Create an instance (private objects for current calculation)
local H, length, tail = {unpack(sha2_H_ext256[width])}, 0, ""
local function partial(text_part)
if text_part then
if tail then
length = length + #text_part
local offs = 0
if tail ~= "" and #tail + #text_part >= 64 then
offs = 64 - #tail
sha256_feed_64(H, sha2_K_hi, tail..sub(text_part, 1, offs), common_W, 0, 64)
tail = ""
end
local size = #text_part - offs
local size_tail = size % 64
sha256_feed_64(H, sha2_K_hi, text_part, common_W, offs, size - size_tail)
tail = tail..sub(text_part, #text_part + 1 - size_tail)
return partial
else
error("Adding more chunks is not allowed after asking for final result", 2)
end
else
if tail then
local final_blocks = {tail, "\128", string_rep("\0", (-9 - length) % 64 + 1)}
tail = nil
-- Assuming user data length is shorter than 2^53 bytes
-- Anyway, it looks very unrealistic that one would spend enough time to process a 2^53 bytes of data by using this Lua script :-)
-- 2^53 bytes = 2^56 bits, so "bit-counter" fits in 7 bytes
length = length * (8 / 256^7) -- convert "byte-counter" to "bit-counter" and move floating point to the left
for j = 4, 10 do
length = length % 1 * 256
final_blocks[j] = char(floor(length))
end
final_blocks = table_concat(final_blocks)
sha256_feed_64(H, sha2_K_hi, final_blocks, common_W, 0, #final_blocks)
local max_reg = width / 32
for j = 1, max_reg do
H[j] = HEX(H[j])
end
H = table_concat(H, "", 1, max_reg)
end
return H
end
end
if text then
-- Actually perform calculations and return the SHA256 digest of a message
return partial(text)()
else
-- Return function for partial chunk loading
-- User should feed every chunks of input data as single argument to this function and receive SHA256 digest by invoking this function without an argument
return partial
end
end
local function sha512ext(width, text)
-- Create an instance (private objects for current calculation)
local length, tail, H_lo, H_hi = 0, "", {unpack(sha2_H_ext512_lo[width])}, {unpack(sha2_H_ext512_hi[width])}
local function partial(text_part)
if text_part then
if tail then
length = length + #text_part
local offs = 0
if tail ~= "" and #tail + #text_part >= 128 then
offs = 128 - #tail
sha512_feed_128(H_lo, H_hi, sha2_K_lo, sha2_K_hi, tail..sub(text_part, 1, offs), common_W, 0, 128)
tail = ""
end
local size = #text_part - offs
local size_tail = size % 128
sha512_feed_128(H_lo, H_hi, sha2_K_lo, sha2_K_hi, text_part, common_W, offs, size - size_tail)
tail = tail..sub(text_part, #text_part + 1 - size_tail)
return partial
else
error("Adding more chunks is not allowed after asking for final result", 2)
end
else
if tail then
local final_blocks = {tail, "\128", string_rep("\0", (-17-length) % 128 + 9)}
tail = nil
-- Assuming user data length is shorter than 2^53 bytes
-- 2^53 bytes = 2^56 bits, so "bit-counter" fits in 7 bytes
length = length * (8 / 256^7) -- convert "byte-counter" to "bit-counter" and move floating point to the left
for j = 4, 10 do
length = length % 1 * 256
final_blocks[j] = char(floor(length))
end
final_blocks = table_concat(final_blocks)
sha512_feed_128(H_lo, H_hi, sha2_K_lo, sha2_K_hi, final_blocks, common_W, 0, #final_blocks)
local max_reg = ceil(width / 64)
for j = 1, max_reg do
H_lo[j] = HEX(H_hi[j])..HEX(H_lo[j])
end
H_hi = nil
H_lo = table_concat(H_lo, "", 1, max_reg):sub(1, width / 4)
end
return H_lo
end
end
if text then
-- Actually perform calculations and return the SHA256 digest of a message
return partial(text)()
else
-- Return function for partial chunk loading
-- User should feed every chunks of input data as single argument to this function and receive SHA256 digest by invoking this function without an argument
return partial
end
end
local sha2for51 = {
sha224 = function (text) return sha256ext(224, text) end, -- SHA-224
sha256 = function (text) return sha256ext(256, text) end, -- SHA-256
sha384 = function (text) return sha512ext(384, text) end, -- SHA-384
sha512 = function (text) return sha512ext(512, text) end, -- SHA-512
sha512_224 = function (text) return sha512ext(224, text) end, -- SHA-512/224
sha512_256 = function (text) return sha512ext(256, text) end, -- SHA-512/256
}
return sha2for51
File sha2for51_test.lua
--------------------------------------------------------------------------------
-- TESTS
--------------------------------------------------------------------------------
local sha2 = require"sha2for51"
local function test_sha256()
local sha256 = sha2.sha256
-- some test strings
assert(sha256("The quick brown fox jumps over the lazy dog") == "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592")
assert(sha256("The quick brown fox jumps over the lazy cog") == "e4c4d8f3bf76b692de791a173e05321150f7a345b46484fe427f6acc7ecc81be")
assert(sha256("abc") == "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad")
assert(sha256("123456") == "8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92")
assert(sha256("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq") == "248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1")
assert(sha256("abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu") == "cf5b16a778af8380036ce59e7b0492370b249b11e8f07a51afac45037afee9d1")
-- chunk-by-chunk loading: sha256("string") == sha256()("st")("ri")("ng")()
local append_next_chunk = sha256() -- create a private closure for calculating digest of single string
append_next_chunk("The quick brown fox")
append_next_chunk(" jumps ")
append_next_chunk("") -- chunk may be empty string
append_next_chunk("over the lazy dog")
assert(append_next_chunk() == "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592") -- asking for final result (invocation without an argument)
assert(append_next_chunk() == "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592") -- you can ask the same result multiple times if needed
-- append_next_chunk("more text") will fail here: no more chunks are allowed after receiving the result, the closure is useless now, let it be GC-ed
assert(not pcall(append_next_chunk, "more text"))
-- one-liner is possible due to "append_next_chunk(chunk)" returns the function "append_next_chunk"
assert(sha256()("The quick brown fox")(" jumps ")("")("over the lazy dog")() == "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592")
-- empty string
assert(sha256("") == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
assert(sha256()() == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
-- computations of different strings don't interfere with each other
local chunk_for_digits = sha256()
chunk_for_digits("123")
local chunk_for_fox = sha256()
chunk_for_fox("The quick brown fox jumps ")
chunk_for_digits("45")
chunk_for_fox("over the lazy dog")
chunk_for_digits("6")
assert(chunk_for_digits() == "8d969eef6ecad3c29a3a629280e686cf0c3f5d5a86aff3ca12020c923adc6c92")
assert(chunk_for_fox() == "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592")
-- "00...0\n"
for i, dgst in pairs{ -- from 50 to 70 zeroes
[50] = "9660acb8046abf46cf27280e61abd174ebac98ad6855e093772b78df85523129",
[51] = "31e1c552b357ace9bcb924691799a3c0d3aa10d8b428d9de28a278e3c79ecb7b",
[52] = "0be5c4bcb6f47e30c13515594dbef4faa3a6485af67c177179fee8b33cd4f2a0",
[53] = "d368c7f6038c1743bdbfe6a9c3a72d4e6916aa219ed8d559766c9e8f9845f3b8",
[54] = "7080a4aa6ff030ae152fe610a62ee29464f92afeb176474551a69d35aab154a0",
[55] = "149c1cda81fa9359c0c2a5e405ca972986f1d53e05f6282871dd1581046b3f44",
[56] = "eb2d4d41948ce546c8adff07ee97342070c5b89789f616a33efe52c7d3ec73d4",
[57] = "c831db596ccbbf248023461b1c05d3ae084bcc79bcb2626c5ec179fb34371f2a",
[58] = "1345b8a930737b1069bbf9b891ce095850f6cdba6e25874ea526a2ccb611fe46",
[59] = "380ad21e466885fae080ceeada75ac04944687e626e161c0b24e91af3eec2def",
[60] = "b9ab06fa30ef8531c5eee11651aa86f8279a245e0a3c29bf6228c59475cc610a",
[61] = "bcc187de6605d9e11a0cc6edf02b67fb651fe1779ec59438788093d8e376c07c",
[62] = "ae0b3681157b83b34de8591d2453915e40c3105ae79434e241d82d4035218e01",
[63] = "68a27b4735f6806fb5983c1805a23797aa93ea06e0ebcb6daada2ea1ab5a05af",
[64] = "827d096d92f3deeaa0e8070d79f45beb176768e57a958a1cd325f5f4b754b048",
[65] = "6c7bd8ec0fe9b4e05a2d27dd5e41a8687a9716a2e8926bdfa141266b12942ec1",
[66] = "2f4b4c41017a2ddd1cc8cd75478a82e9452e445d4242f09782535376d6f4ba50",
[67] = "b777b86e005807a446ead00986fcbf3bdd6c022524deabf017eeb3f0c30b6eed",
[68] = "777da331f60c793f582e4ca33223778218ddfd241981f15be5886171fb8301b5",
[69] = "06ed0c4cbf7d2b38de5f01eab2d2cd552d9cb87f97b714b96bb7a9d1b6117c6d",
[70] = "e82223344d5f3c024514cfbe6d478b5df98bb878f34d7a07e7b064fa7fa91946"
} do
assert(sha256(("0"):rep(i).."\n") == dgst)
end
-- "aa...a"
assert(sha256(("a"):rep(55)) == "9f4390f8d30c2dd92ec9f095b65e2b9ae9b0a925a5258e241c9f1e910f734318")
assert(sha256(("a"):rep(56)) == "b35439a4ac6f0948b6d6f9e3c6af0f5f590ce20f1bde7090ef7970686ec6738a")
-- "aa...a\n" in chunk-by-chunk mode
local next_chunk = sha256()
for i = 1, 65 do
next_chunk("a")
end
next_chunk("\n")
assert(next_chunk() == "574883a9977284a46845620eaa55c3fa8209eaa3ebffe44774b6eb2dba2cb325")
local function split_and_calculate_sha256(s, len) -- split string s in chunks of length len
local next_chunk = sha256()
for idx = 1, #s, len do
next_chunk(s:sub(idx, idx + len - 1))
end
return next_chunk()
end
-- "00...0\n00...0\n...00...0\n" (80 lines of 80 zeroes each) in chunk-by-chunk mode with different chunk lengths
local s = (("0"):rep(80).."\n"):rep(80)
assert(split_and_calculate_sha256(s, 1) == "736c7a8b17e2cfd44a3267a844db1a8a3e8988d739e3e95b8dd32678fb599139")
assert(split_and_calculate_sha256(s, 2) == "736c7a8b17e2cfd44a3267a844db1a8a3e8988d739e3e95b8dd32678fb599139")
assert(split_and_calculate_sha256(s, 7) == "736c7a8b17e2cfd44a3267a844db1a8a3e8988d739e3e95b8dd32678fb599139")
assert(split_and_calculate_sha256(s, 70) == "736c7a8b17e2cfd44a3267a844db1a8a3e8988d739e3e95b8dd32678fb599139")
end
local function test_sha512()
local sha512 = sha2.sha512
assert(sha512("abc") == "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f")
assert(sha512("abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu") ==
"8e959b75dae313da8cf4f72814fc143f8f7779c6eb9f7fa17299aeadb6889018501d289e4900f7e4331b99dec4b5433ac7d329eeb6dd26545e96e55b874be909")
-- "aa...a"
for i, dgst in pairs{ -- from 109 to 116 letters "a"
[109] = "0cda6b04d9466bb7f3995c16732e1347f29c23a64fe0b085fadba0995644cc5aa71587423c274c10e09518310c5f866cfaceb229fabb574219f12182eb114182",
[110] = "c825949632e509824543f7eaf159fb6041722fce3c1cdcbb613b3d37ff107c519417baac32f8e74fe29d7f4823bf6886956603dca5354a6ed6e4a542e06b7d28",
[111] = "fa9121c7b32b9e01733d034cfc78cbf67f926c7ed83e82200ef86818196921760b4beff48404df811b953828274461673c68d04e297b0eb7b2b4d60fc6b566a2",
[112] = "c01d080efd492776a1c43bd23dd99d0a2e626d481e16782e75d54c2503b5dc32bd05f0f1ba33e568b88fd2d970929b719ecbb152f58f130a407c8830604b70ca",
[113] = "55ddd8ac210a6e18ba1ee055af84c966e0dbff091c43580ae1be703bdb85da31acf6948cf5bd90c55a20e5450f22fb89bd8d0085e39f85a86cc46abbca75e24d",
[114] = "5e9eb0e4b270d086e77eeaf3ce8b1cfc615031b8c463dc34f5c139786f274f22accb4d89e8f40d1a0c2acc84c4dc0f2bab390a9d9495493bd617ed004271bb64",
[115] = "eaa30f93760743ac7d0a6cb8ed5ef3b30c59097bc44d0ec337344301deba9fb92b20c488d55de415f6aaed0df4925b42894b81d2e1cde89d91ec7f6cc67262b4",
[116] = "a8bff469314a1ce0c990bb3fd539d92accb6249cc674b559bc9d3898b7a126fee597197fa42c971443470053c7d7f54b09371a59b0f7af87b1917c5347e8f8e0",
} do
assert(sha512(("a"):rep(i)) == dgst)
end
end
local function all_tests_sha2()
test_sha256()
assert(sha2.sha224"abc" == "23097d223405d8228642a477bda255b32aadbce4bda0b3f7e36c9da7")
assert(sha2.sha224"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" == "75388b16512776cc5dba5da1fd890150b0c6455cb4f58b1952522525")
test_sha512()
assert(sha2.sha384"abc" == "cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed8086072ba1e7cc2358baeca134c825a7")
assert(sha2.sha384"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu" == "09330c33f71147e83d192fc782cd1b4753111b173b3b05d22fa08086e3b0f712fcc7c71a557e2db966c3e9fa91746039")
assert(sha2.sha512_224"abc" == "4634270f707b6a54daae7530460842e20e37ed265ceee9a43e8924aa")
assert(sha2.sha512_224"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu" == "23fec5bb94d60b23308192640b0c453335d664734fe40e7268674af9")
assert(sha2.sha512_256"abc" == "53048e2681941ef99b2e29b76b4c7dabe4c2d0c634fc6d46e0e2f13107e7af23")
assert(sha2.sha512_256"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu" == "3928e184fb8690f840da3988121d31be65cb9d3ef83ee6146feac861e19b563a")
print"All tests passed"
end
all_tests_sha2()
local function benchmark()
print("Benchmarking (calculating SHA512 of 1MByte string of letters 'a')...")
local time_intervals = {}
local length = 2^20
local part = ("a"):rep(2^12)
local N = length/#part
local result
local k = 2
for j = 1, 2*k-1 do
local clk0 = os.clock()
local x = sha2.sha512()
for j = 1, N do
x(part)
end
result = x()
time_intervals[j] = os.clock() - clk0
end
--print("Result = "..result)
-- get median time
table.sort(time_intervals)
print('CPU seconds:', time_intervals[k])
end
benchmark() -- about 15 seconds per megabyte

Horn-Schunck optical flow implementation issue

I am trying to implement Horn-Schunck optical flow algorithm by NumPy and OpenCV
I use Horn-Schunck method on wiki and original paper
But my implementation fails on following simple example
Frame1:
[[ 0 0 0 0 0 0 0 0 0 0]
[ 0 255 255 0 0 0 0 0 0 0]
[ 0 255 255 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]]
Frame2:
[[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 255 255 0 0 0 0 0]
[ 0 0 0 255 255 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]
[ 0 0 0 0 0 0 0 0 0 0]]
This is just small white rectangle that moves by 2 pixels on frame2
My implementation produce following flow
U part of flow (I apply np.round to every part of flow. Original values is pretty the same):
[[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
V part of flow:
[[ 0. 1. 0. -1. -0. 0. 0. 0. 0. 0.]
[-0. -0. 0. 0. 0. 0. 0. 0. 0. 0.]
[-0. -1. -0. 1. 0. 0. 0. 0. 0. 0.]
[-0. -0. -0. 0. 0. 0. 0. 0. 0. 0.]
[-0. -0. -0. 0. 0. 0. 0. 0. 0. 0.]]
It look like this flow is incorrect (Because if i move every pixel of frame2 in direction of corresponding flow component i never get frame1)
Also my implementation fails on real images
But if i move rectangle by 1 pixel right (or left or top or down) my implementation produce:
U part of flow:
[[1 1 1 .....]
[1 1 1 .....]
......
[1 1 1 .....]]
V part of flow:
[[0 0 0 .....]
[0 0 0 .....]
......
[0 0 0 .....]]
I suppose that this flow is correct because i can reconstruct frame 1 by following procedure
def translateBrute(img, u, v):
res = np.zeros_like(img)
u = np.round(u).astype(np.int)
v = np.round(v).astype(np.int)
for i in xrange(img.shape[0]):
for j in xrange(img.shape[1]):
res[i, j] = takePixel(img, i + v[i, j], j + u[i, j])
return res
where takePixel is simple function that returns pixel intensity if input coordinates lays inside of image or intensity on image border otherwise
This is my implementation
import cv2
import sys
import numpy as np
def takePixel(img, i, j):
i = i if i >= 0 else 0
j = j if j >= 0 else 0
i = i if i < img.shape[0] else img.shape[0] - 1
j = j if j < img.shape[1] else img.shape[1] - 1
return img[i, j]
#Numerical derivatives from original paper: http://people.csail.mit.edu/bkph/papers/Optical_Flow_OPT.pdf
def xDer(img1, img2):
res = np.zeros_like(img1)
for i in xrange(res.shape[0]):
for j in xrange(res.shape[1]):
sm = 0
sm += takePixel(img1, i, j + 1) - takePixel(img1, i, j)
sm += takePixel(img1, i + 1, j + 1) - takePixel(img1, i + 1, j)
sm += takePixel(img2, i, j + 1) - takePixel(img2, i, j)
sm += takePixel(img2, i + 1, j + 1) - takePixel(img2, i + 1, j)
sm /= 4.0
res[i, j] = sm
return res
def yDer(img1, img2):
res = np.zeros_like(img1)
for i in xrange(res.shape[0]):
for j in xrange(res.shape[1]):
sm = 0
sm += takePixel(img1, i + 1, j ) - takePixel(img1, i, j )
sm += takePixel(img1, i + 1, j + 1) - takePixel(img1, i, j + 1)
sm += takePixel(img2, i + 1, j ) - takePixel(img2, i, j )
sm += takePixel(img2, i + 1, j + 1) - takePixel(img2, i, j + 1)
sm /= 4.0
res[i, j] = sm
return res
def tDer(img, img2):
res = np.zeros_like(img)
for i in xrange(res.shape[0]):
for j in xrange(res.shape[1]):
sm = 0
for ii in xrange(i, i + 2):
for jj in xrange(j, j + 2):
sm += takePixel(img2, ii, jj) - takePixel(img, ii, jj)
sm /= 4.0
res[i, j] = sm
return res
averageKernel = np.array([[ 0.08333333, 0.16666667, 0.08333333],
[ 0.16666667, 0. , 0.16666667],
[ 0.08333333, 0.16666667, 0.08333333]], dtype=np.float32)
#average intensity around flow in point i,j. I use filter2D to improve performance.
def average(img):
return cv2.filter2D(img.astype(np.float32), -1, averageKernel)
def translateBrute(img, u, v):
res = np.zeros_like(img)
u = np.round(u).astype(np.int)
v = np.round(v).astype(np.int)
for i in xrange(img.shape[0]):
for j in xrange(img.shape[1]):
res[i, j] = takePixel(img, i + v[i, j], j + u[i, j])
return res
#Core of algorithm. Iterative scheme from wiki: https://en.wikipedia.org/wiki/Horn%E2%80%93Schunck_method#Mathematical_details
def hornShunckFlow(img1, img2, alpha):
img1 = img1.astype(np.float32)
img2 = img2.astype(np.float32)
Idx = xDer(img1, img2)
Idy = yDer(img1, img2)
Idt = tDer(img1, img2)
u = np.zeros_like(img1)
v = np.zeros_like(img1)
#100 iterations enough for small example
for iteration in xrange(100):
u0 = np.copy(u)
v0 = np.copy(v)
uAvg = average(u0)
vAvg = average(v0)
# '*', '+', '/' operations in numpy works component-wise
u = uAvg - 1.0/(alpha**2 + Idx**2 + Idy**2) * Idx * (Idx * uAvg + Idy * vAvg + Idt)
v = vAvg - 1.0/(alpha**2 + Idx**2 + Idy**2) * Idy * (Idx * uAvg + Idy * vAvg + Idt)
if iteration % 10 == 0:
print 'iteration', iteration, np.linalg.norm(u - u0) + np.linalg.norm(v - v0)
return u, v
if __name__ == '__main__':
img1c = cv2.imread(sys.argv[1])
img2c = cv2.imread(sys.argv[2])
img1g = cv2.cvtColor(img1c, cv2.COLOR_BGR2GRAY)
img2g = cv2.cvtColor(img2c, cv2.COLOR_BGR2GRAY)
u, v = hornShunckFlow(img1g, img2g, 0.1)
imgRes = translateBrute(img2g, u, v)
cv2.imwrite('res.png', imgRes)
print img1g
print translateBrute(img2g, u, v)
Optimization scheme are taken from wikipedia and numerical derivatives are taken from original paper.
Anyone have idea why my implementation produce incorrect flow?
I can provide any additional info if it necessary
PS Sorry for my poor english
UPD:
I implement Horn-Schunck cost function
def grad(img):
Idx = cv2.filter2D(img, -1, np.array([
[-1, -2, -1],
[ 0, 0, 0],
[ 1, 2, 1]], dtype=np.float32))
Idy = cv2.filter2D(img, -1, np.array([
[-1, 0, 1],
[-2, 0, 2],
[-1, 0, 1]], dtype=np.float32))
return Idx, Idy
def hornShunckCost(Idx, Idy, Idt, u, v, alpha):
#return sum(sum(It**2))
udx, udy = grad(u)
vdx, vdy = grad(v)
return (sum(sum((Idx*u + Idy*v + Idt)**2)) +
(alpha**2)*(sum(sum(udx**2)) +
sum(sum(udy**2)) +
sum(sum(vdx**2)) +
sum(sum(vdy**2))
))
and check value of this function inside iterations
if iteration % 10 == 0:
print 'iter', iteration, np.linalg.norm(u - u0) + np.linalg.norm(v - v0)
print hornShunckCost(Idx, Idy, Idt, u, v, alpha)
If i use simple example with rectangle that has been moved by one pixel everything is ok: value of cost function decrease at every step.
But on example with rectangle that has been moved by two pixels value of cost function increase at every step.
This behaviour of algorithm is really strange
Maybe i choose incorrect way to calculate cost function.
I lost a fact that classic Horn-Schunck scheme uses linearized data term (I1(x, y) - I2(x + u(x, y), y + v(x, y))). This linearization make optimization easy but disallows large displacements
To handle big displacements there are next approach Pyramidal Horn-Schunck

Stack-relative vs stack deferred addressing

What are the practical diffences between s and sf in the two code samples below?
I understand that stack relative look like Mem[SP + OprndSpec] and deffered look like Mem[Mem[SP + OprndSpec]]. However what I don't understand is how this is accomplished.
Stack deffered
BR main
a: .BLOCK 2 ;global variable #2d
b: .BLOCK 2 ;global variable #2d
;
;******* void swap (int& r, int& s)
r: .EQUATE 6 ;formal parameter #2h
s: .EQUATE 4 ;formal parameter #2h
temp: .EQUATE 0 ;local variable #2d
swap: SUBSP 2,i ;allocate #temp
LDA r,sf ;temp = r
STA temp,s
LDA s,sf ;r = s
STA r,sf
LDA temp,s ;s = temp
STA s,sf
RET2 ;deallocate #temp, pop retAddr
;
;******* void order (int& x, int& y)
x: .EQUATE 4 ;formal parameter #2h
y: .EQUATE 2 ;formal parameter #2h
order: LDA x,sf ;if (x > y)
CPA y,sf
BRLE endIf
LDA x,s ; push x
STA -2,s
LDA y,s ; push y
STA -4,s
SUBSP 4,i ; push #r #s
CALL swap ; swap (x, y)
ADDSP 4,i ; pop #s #r
endIf: RET0 ;pop retAddr
;
;******* main ()
main: STRO msg1,d ;cout << "Enter an integer: "
DECI a,d ;cin >> a
STRO msg1,d ;cout << "Enter an integer: "
DECI b,d ;cin >> b
LDA a,i ;push the address of a
STA -2,s
LDA b,i ;push the address of b
STA -4,s
SUBSP 4,i ;push #x #y
CALL order ;order (a, b)
ra1: ADDSP 4,i ;pop #y #x
STRO msg2,d ;cout << "Ordered they are: "
DECO a,d ; << a
STRO msg3,d ; << ", "
DECO b,d ; << b
CHARO '\n',i ; << endl
STOP
msg1: .ASCII "Enter an integer: \x00"
msg2: .ASCII "Ordered they are: \x00"
msg3: .ASCII ", \x00"
.END
Stack relative
BR main
;
;******* int binomCoeff (int n, int k)
retVal: .EQUATE 10 ;returned value #2d
n: .EQUATE 8 ;formal parameter #2d
k: .EQUATE 6 ;formal parameter #2d
y1: .EQUATE 2 ;local variable #2d
y2: .EQUATE 0 ;local variable #2d
binCoeff:SUBSP 4,i ;allocate #y1 #y2
if: LDA k,s ;if ((k == 0)
BREQ then
LDA n,s ;|| (n == k))
CPA k,s
BRNE else
then: LDA 1,i ;return 1
STA retVal,s
RET4 ;deallocate #y2 #y1, pop retAddr
else: LDA n,s ;push n - 1
SUBA 1,i
STA -4,s
LDA k,s ;push k
STA -6,s
SUBSP 6,i ;push #retVal #n #k
CALL binCoeff
ra2: ADDSP 6,i ;pop #k #n #retVal
LDA -2,s ;y1 = binomCoeff (n - 1, k)
STA y1,s
LDA n,s ;push n - 1
SUBA 1,i
STA -4,s
LDA k,s ;push k - 1
SUBA 1,i
STA -6,s
SUBSP 6,i ;push #retVal #n #k
CALL binCoeff
ra3: ADDSP 6,i ;pop #k #n #retVal
LDA -2,s ;y2 = binomCoeff (n - 1, k - 1)
STA y2,s
LDA y1,s ;return y1 + y2
ADDA y2,s
STA retVal,s
endIf: RET4 ;deallocate #y2 #y1, pop retAddr
;
;******* main ()
main: STRO msg,d ;cout << "binCoeff (3, 1) = "
LDA 3,i ;push 3
STA -4,s
LDA 1,i ;push 1
STA -6,s
SUBSP 6,i ;push #retVal #n #k
CALL binCoeff
ra1: ADDSP 6,i ;pop #k #n #retVal
DECO -2,s ;<< binCoeff (3, 1)
CHARO '\n',i ;cout << endl
STOP
msg: .ASCII "binCoeff (3, 1) = \x00"
.END
Your question seems simple and has little to do with code other than they presumably contain examples.
(I'm not familiar with this instruction set, but have been coding a long time in assembly.)
According to the definition you provided, "stack relative" means taking an item from memory at a location determined by the stack pointer, plus a constant offset presumably embedded in an instruction. This is called indexed addressing by most folk, with the special note that it is stack-pointer indexed.
"Deferred" (an old term) usually means "indirect through a memory location" and your definition of that is consistent with this idea: find the "stack relative" location, read that, and use that value as the memory location to fetch.

Resources