Use of Xilinx_Out32 for specific nibble set - xilinx

Is there a Xilinx internal function that may allow the set/reset of specific nibble component without disturbing the other nibbles in a given 32 bit AXI-lite Memory mapped 32-bit data width.
Example: -
Addr || Data
0x01 || 0x00110011
0x01 || 0x0"1"110011 - only setting of the second nibble from the MSB position
Thank you

Take a look at xil_io.h. There is no such function available. So you have to write this function. Please take a look at this untested example.
static INLINE void Xil_OutNibble32(UINTPTR Addr, u8 Nibble, u8 Value)
{
// Get the old value
u32 temp = Xil_In32(Addr);
// Clear/set the addressed nibble
if(Value)
{
temp |= (0x0F << Nibble);
}
else
{
temp &= ~(0x0F << Nibble);
}
// Write the new value
Xil_Out32(Addr, temp);
}

Related

writing all 1s or 0s to 23k640 SRAM

Hi please find my code below, I am trying to write to SRAM. please help
my code below reads the output from a cell but i can't write to that cell
CS: pin 12
MOSI: pin 8
MISO: pin 10
SCK: pin 9
*/
#include <SPI.h>
//SRAM opcodes
#define RDSRAM 5 //00000101
#define WRSRAM 1 //00000001
#define READ 3 //00000011
#define WRITE 2 //00000010
int *ptr;
int CS = 12;
int CSS = 8;
char buf [90];
int response_pair;
int entryval;
int codeAddr = 545;
char s [90];
//char value = *(char*)0x5C;
uint8_t Spi23K640Rd8(uint32_t address){
uint8_t read_byte;
digitalWrite(CS,LOW);
SPI.transfer(READ);
//SPI.transfer((uint8_t)(address >> 16) & 0xff);
SPI.transfer((uint8_t)(address >> 8) & 0xff);
SPI.transfer((uint8_t)address);
read_byte = SPI.transfer(0x00);
digitalWrite(CS,HIGH);
return read_byte;
}
void Spi23K640Wr8(uint32_t address, uint8_t data_byte)
{
SPI.transfer(WRITE);
SPI.transfer((uint8_t)(address >> 16) & 0xff);
SPI.transfer((uint8_t)(address >> 8) & 0xff);
SPI.transfer((uint8_t)address);
SPI.transfer(data_byte);
}
void setup(void) {
// char *ptr;
// char myvar[1] = {545};
uint64_t i;
uint8_t value;
ptr=&codeAddr;
/* all pins on the Port B set to output-low */
pinMode(CSS, OUTPUT);
digitalWrite(CSS, HIGH);
pinMode(CS, OUTPUT);
Serial.begin(9600);
delay(2500);
SPI.begin();
for (i=0; i<=8192; i++) { // Do all memory locations, 64 Kbit SRAM = 65536 / 8 = 8192
Spi23K640Wr8(i, (uint8_t)i);
value = Spi23K640Rd8(i);
Serial.print((uint64_t)value, DEC);
if ( !(i % 32) && !(i==0) ) { // Every 32, do a new line and don't do the first item either
Serial.println(value);
} else
{ // Other wise, print a comma
Serial.print(",");
}
}
while (!Serial) ;
int response_pair = Spi23K640Rd8 (codeAddr);
Serial.println ("Enter Challenge");
//Spi23K640Wr8();
delay(500);
}
void loop() {
//while (!Serial) ;
int response_pair = Spi23K640Rd8 (codeAddr);
if (Serial.available ()) {
int n = Serial.readBytesUntil ('\n', buf, sizeof (buf)-1); //.toInt(); //save read value method to n
buf [n] = '\0';
sscanf (buf, "%o", &entryval); //check values
sprintf (s, " buf %s, response_pair %o entryval %o", buf, response_pair, entryval); //point the values from the pointer
if(entryval == response_pair )
{
Serial.println ("RESPONSE PAIR MATCHES ");
Serial.println ("loading address......");
Serial.print ("CRP address output = ");
Serial.println (Spi23K640Rd8(codeAddr), DEC); //prints out specific address
Serial.println ("Authenticate Chip");
Serial.println (s);
//delay (500);
}
else if (response_pair != entryval)
{
Serial.println ("INTRUDER ALERT!!!Wrong challenge");
Serial.println (s); //print the values in different types
delay (n);
}
// return;
// while (!Serial) ;
Serial.println();
Serial.println ("Enter Another Challenge"); //start the process again
//Serial.println (s); //print the values in different types
}
// put your main code here, to run repeatedly:
//Serial.println("Hello LoRa");
//delay(50);
//ptr++;
}
i was able to read the power up state but haven't had any luck writing to the SRAM cells
any suggestions will be appreciated. i am on a tight schedule.
disregard beloww
We have established that In order to evaluate the properties of the SRAM as a PUF, we perform a number of specifically selected tests to investigate the behaviour of the start-up values of the SRAM memory
• The technique can be viewed as an attempt to read multiple cells in a column at the same time, creating contention that is resolved according to process variation
• An authentication challenge is issued to the array of SRAM cells by activating two or more wordlines concurrently
• The response is simply the value that the SRAM produces from a read operation when the challenge condition is applied
• The number of challenges that can be applied the array of SRAM cells grows exponentially with the number of SRAM rows and these challenges can be applied at any time without power cycling
• providing an array of different responses on different chips ; these challenges are SRAM cells arranged in rows and columns where SRAM cells in each column and array share a worldlines
• SRAM cells in each column in the array share common is a graph illustrating the number of unbiased bit lines
The CS line is not asserted during the write operation, and the SRAM uses 16 and not 24-bit addresses. You can try changing your read and write functions to something like this:
uint8_t Spi23K640Rd8(uint16_t address) { // <-- change from uint32_t to uint16_t
uint8_t read_byte;
digitalWrite(CS, LOW); // That's good
SPI.transfer(READ); // Read # 16-bit address, that's good
SPI.transfer((uint8_t)(address >> 8) & 0xff);
SPI.transfer((uint8_t)address);
read_byte = SPI.transfer(0x00);
digitalWrite(CS, HIGH);
return read_byte;
}
void Spi23K640Wr8(uint16_t address, uint8_t data_byte) { // <-- change from uint32_t to uint16_t
digitalWrite(CS, LOW); // Was missing.
SPI.transfer(WRITE); // write #16-bit address
// SPI.transfer((uint8_t)(address >> 16) & 0xff); // <- BUG!!! this byte is not expected!
SPI.transfer((uint8_t)(address >> 8) & 0xff);
SPI.transfer((uint8_t)address);
SPI.transfer(data_byte);
digitalWrite(CS, HIGH); // clear CS.
}
Note: You should also consider renaming these functions to make your code easier to read.
How about replacing
uint8_t Spi23K640Rd8(uint16_t address);
void Spi23K640Wr8(uint16_t address, uint8_t data_byte);
With
uint8_t SRAM_23K640_ReadByte(uint16_t address);
void SRAM_23K640_WriteByte(uint16_t address, uint8_t data_byte);
Or whatever you see fit. Keep in mind that our eyes and brain have a much easier time reading shorter, pronounceable words. When the brain is too busy reading long mumbo jumbo, thinking about other things, like what the code does, becomes more difficult.

How to make an operation similar to _mm_extract_epi8 with non-immediate input?

What I want is extracting a value from vector using a variable scalar index.
Like _mm_extract_epi8 / _mm256_extract_epi8 but with non-immediate input.
(There are some results in the vector, the one with the given index is found out to be the true result, the rest are discarded)
Especially, if index is in a GPR, the easiest way is probably to store val to memory and then movzx it into another GPR. Sample implementation using C:
uint8_t extract_epu8var(__m256i val, int index) {
union {
__m256i m256;
uint8_t array[32];
} tmp;
tmp.m256 = val;
return tmp.array[index];
}
Godbolt translation (note that a lot of overhead happens for stack alignment -- if you don't have an aligned temporary storage area, you could just vmovdqu instead of vmovdqa): https://godbolt.org/z/Gj6Eadq9r
So far the best option seem to be using _mm_shuffle_epi8 for SSE
uint8_t extract_epu8var(__m128i val, int index) {
return (uint8_t)_mm_cvtsi128_si32(
_mm_shuffle_epi8(val, _mm_cvtsi32_si128(index)));
}
Unfortunately this does not scale well for AVX. vpshufb does not shuffle across lanes. There is a cross lane shuffle _mm256_permutevar8x32_epi32, but the resulting stuff seem to be complicated:
uint8_t extract_epu8var(__m256i val, int index) {
int index_low = index & 0x3;
int index_high = (index >> 2);
return (uint8_t)(_mm256_cvtsi256_si32(_mm256_permutevar8x32_epi32(
val, _mm256_zextsi128_si256(_mm_cvtsi32_si128(index_high))))
>> (index_low << 3));
}

writing to flash memory dspic33e

I have some questions regarding the flash memory with a dspic33ep512mu810.
I'm aware of how it should be done:
set all the register for address, latches, etc. Then do the sequence to start the write procedure or call the builtins function.
But I find that there is some small difference between what I'm experiencing and what is in the DOC.
when writing the flash in WORD mode. In the DOC it is pretty straightforward. Following is the example code in the DOC
int varWord1L = 0xXXXX;
int varWord1H = 0x00XX;
int varWord2L = 0xXXXX;
int varWord2H = 0x00XX;
int TargetWriteAddressL; // bits<15:0>
int TargetWriteAddressH; // bits<22:16>
NVMCON = 0x4001; // Set WREN and word program mode
TBLPAG = 0xFA; // write latch upper address
NVMADR = TargetWriteAddressL; // set target write address
NVMADRU = TargetWriteAddressH;
__builtin_tblwtl(0,varWord1L); // load write latches
__builtin_tblwth(0,varWord1H);
__builtin_tblwtl(0x2,varWord2L);
__builtin_tblwth(0x2,varWord2H);
__builtin_disi(5); // Disable interrupts for NVM unlock sequence
__builtin_write_NVM(); // initiate write
while(NVMCONbits.WR == 1);
But that code doesn't work depending on the address where I want to write. I found a fix to write one WORD but I can't write 2 WORD where I want. I store everything in the aux memory so the upper address(NVMADRU) is always 0x7F for me. The NVMADR is the address I can change. What I'm seeing is that if the address where I want to write modulo 4 is not 0 then I have to put my value in the 2 last latches, otherwise I have to put the value in the first latches.
If address modulo 4 is not zero, it doesn't work like the doc code(above). The value that will be at the address will be what is in the second set of latches.
I fixed it for writing only one word at a time like this:
if(Address % 4)
{
__builtin_tblwtl(0, 0xFFFF);
__builtin_tblwth(0, 0x00FF);
__builtin_tblwtl(2, ValueL);
__builtin_tblwth(2, ValueH);
}
else
{
__builtin_tblwtl(0, ValueL);
__builtin_tblwth(0, ValueH);
__builtin_tblwtl(2, 0xFFFF);
__builtin_tblwth(2, 0x00FF);
}
I want to know why I'm seeing this behavior?
2)I also want to write a full row.
That also doesn't seem to work for me and I don't know why because I'm doing what is in the DOC.
I tried a simple write row code and at the end I just read back the first 3 or 4 element that I wrote to see if it works:
NVMCON = 0x4002; //set for row programming
TBLPAG = 0x00FA; //set address for the write latches
NVMADRU = 0x007F; //upper address of the aux memory
NVMADR = 0xE7FA;
int latchoffset;
latchoffset = 0;
__builtin_tblwtl(latchoffset, 0);
__builtin_tblwth(latchoffset, 0); //current = 0, available = 1
latchoffset+=2;
__builtin_tblwtl(latchoffset, 1);
__builtin_tblwth(latchoffset, 1); //current = 0, available = 1
latchoffset+=2;
.
. all the way to 127(I know I could have done it in a loop)
.
__builtin_tblwtl(latchoffset, 127);
__builtin_tblwth(latchoffset, 127);
INTCON2bits.GIE = 0; //stop interrupt
__builtin_write_NVM();
while(NVMCONbits.WR == 1);
INTCON2bits.GIE = 1; //start interrupt
int testaddress;
testaddress = 0xE7FA;
status = NVMemReadIntH(testaddress);
status = NVMemReadIntL(testaddress);
testaddress += 2;
status = NVMemReadIntH(testaddress);
status = NVMemReadIntL(testaddress);
testaddress += 2;
status = NVMemReadIntH(testaddress);
status = NVMemReadIntL(testaddress);
testaddress += 2;
status = NVMemReadIntH(testaddress);
status = NVMemReadIntL(testaddress);
What I see is that the value that is stored in the address 0xE7FA is 125, in 0xE7FC is 126 and in 0xE7FE is 127. And the rest are all 0xFFFF.
Why is it taking only the last 3 latches and write them in the first 3 address?
Thanks in advance for your help people.
The dsPIC33 program memory space is treated as 24 bits wide, it is
more appropriate to think of each address of the program memory as a
lower and upper word, with the upper byte of the upper word being
unimplemented
(dsPIC33EPXXX datasheet)
There is a phantom byte every two program words.
Your code
if(Address % 4)
{
__builtin_tblwtl(0, 0xFFFF);
__builtin_tblwth(0, 0x00FF);
__builtin_tblwtl(2, ValueL);
__builtin_tblwth(2, ValueH);
}
else
{
__builtin_tblwtl(0, ValueL);
__builtin_tblwth(0, ValueH);
__builtin_tblwtl(2, 0xFFFF);
__builtin_tblwth(2, 0x00FF);
}
...will be fine for writing a bootloader if generating values from a valid Intel HEX file, but doesn't make it simple for storing data structures because the phantom byte is not taken into account.
If you create a uint32_t variable and look at the compiled HEX file, you'll notice that it in fact uses up the least significant words of two 24-bit program words. I.e. the 32-bit value is placed into a 64-bit range but only 48-bits out of the 64-bits are programmable, the others are phantom bytes (or zeros). Leaving three bytes per address modulo of 4 that are actually programmable.
What I tend to do if writing data is to keep everything 32-bit aligned and do the same as the compiler does.
Writing:
UINT32 value = ....;
:
__builtin_tblwtl(0, value.word.word_L); // least significant word of 32-bit value placed here
__builtin_tblwth(0, 0x00); // phantom byte + unused byte
__builtin_tblwtl(2, value.word.word_H); // most significant word of 32-bit value placed here
__builtin_tblwth(2, 0x00); // phantom byte + unused byte
Reading:
UINT32 *value
:
value->word.word_L = __builtin_tblrdl(offset);
value->word.word_H = __builtin_tblrdl(offset+2);
UINT32 structure:
typedef union _UINT32 {
uint32_t val32;
struct {
uint16_t word_L;
uint16_t word_H;
} word;
uint8_t bytes[4];
} UINT32;

Any suggestions about how to implement a BASIC language parser/interpreter?

I've been trying to implement a BASIC language interpreter (in C/C++) but I haven't found any book or (thorough) article which explains the process of parsing the language constructs. Some commands are rather complex and hard to parse, especially conditionals and loops, such as IF-THEN-ELSE and FOR-STEP-NEXT, because they can mix variables with constants and entire expressions and code and everything else, for example:
10 IF X = Y + Z THEN GOTO 20 ELSE GOSUB P
20 FOR A = 10 TO B STEP -C : PRINT C$ : PRINT WHATEVER
30 NEXT A
It seems like a nightmare to be able to parse something like that and make it work. And to make things worse, programs written in BASIC can easily be a tangled mess. That's why I need some advice, read some book or whatever to make my mind clear about this subject. What can you suggest?
You've picked a great project - writing interpreters can be lots of fun!
But first, what do we even mean by an interpreter? There are different types of interpreters.
There is the pure interpreter, where you simply interpret each language element as you find it. These are the easiest to write, and the slowest.
A step up, would be to convert each language element into some sort of internal form, and then interpret that. Still pretty easy to write.
The next step, would be to actually parse the language, and generate a syntax tree, and then interpret that. This is somewhat harder to write, but once you've done it a few times, it becomes pretty easy.
Once you have a syntax tree, you can fairly easily generate code for a custom stack virtual machine. A much harder project is to generate code for an existing virtual machine, such as the JVM or CLR.
In programming, like most engineering endeavors, careful planning greatly helps, especially with complicated projects.
So the first step is to decide which type of interpreter you wish to write. If you have not read any of a number of compiler books (e.g., I always recommend Niklaus Wirth's "Compiler Construction" as one of the best introductions to the subject, and is now freely available on the web in PDF form), I would recommend that you go with the pure interpreter.
But you still need to do some additional planning. You need to rigorously define what it is you are going to be interpreting. EBNF is great for this. For a gentile introduction EBNF, read the first three parts of a Simple Compiler at http://www.semware.com/html/compiler.html It is written at the high school level, and should be easy to digest. Yes, I tried it on my kids first :-)
Once you have defined what it is you want to be interpreting, you are ready to write your interpreter.
Abstractly, you're simple interpreter will be divided into a scanner (technically, a lexical analyzer), a parser, and an evaluator. In the simple pure interpolator case, the parser and evaluator will be combined.
Scanners are easy to write, and easy to test, so we won't spend any time on them. See the aforementioned link for info on crafting a simple scanner.
Lets (for example) define your goto statement:
gotostmt -> 'goto' integer
integer -> [0-9]+
This tells us that when we see the token 'goto' (as delivered by the scanner), the only thing that can follow is an integer. And an integer is simply a string a digits.
In pseudo code, we might handle this as so:
(token - is the current token, which is the current element just returned via the scanner)
loop
if token == "goto"
goto_stmt()
elseif token == "gosub"
gosub_stmt()
elseif token == .....
endloop
proc goto_stmt()
expect("goto") -- redundant, but used to skip over goto
if is_numeric(token)
--now, somehow set the instruction pointer at the requested line
else
error("expecting a line number, found '%s'\n", token)
end
end
proc expect(s)
if s == token
getsym()
return true
end
error("Expecting '%s', found: '%s'\n", curr_token, s)
end
See how simple it is? Really, the only hard thing to figure out in a simple interpreter is the handling of expressions. A good recipe for handling those is at: http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm Combined with the aforementioned references, you should have enough to handle the sort of expressions you would encounter in BASIC.
Ok, time for a concrete example. This is from a larger 'pure interpreter', that handles a enhanced version of Tiny BASIC (but big enough to run Tiny Star Trek :-) )
/*------------------------------------------------------------------------
Simple example, pure interpreter, only supports 'goto'
------------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <setjmp.h>
#include <ctype.h>
enum {False=0, True=1, Max_Lines=300, Max_Len=130};
char *text[Max_Lines+1]; /* array of program lines */
int textp; /* used by scanner - ptr in current line */
char tok[Max_Len+1]; /* the current token */
int cur_line; /* the current line number */
int ch; /* current character */
int num; /* populated if token is an integer */
jmp_buf restart;
int error(const char *fmt, ...) {
va_list ap;
char buf[200];
va_start(ap, fmt);
vsprintf(buf, fmt, ap);
va_end(ap);
printf("%s\n", buf);
longjmp(restart, 1);
return 0;
}
int is_eol(void) {
return ch == '\0' || ch == '\n';
}
void get_ch(void) {
ch = text[cur_line][textp];
if (!is_eol())
textp++;
}
void getsym(void) {
char *cp = tok;
while (ch <= ' ') {
if (is_eol()) {
*cp = '\0';
return;
}
get_ch();
}
if (isalpha(ch)) {
for (; !is_eol() && isalpha(ch); get_ch()) {
*cp++ = (char)ch;
}
*cp = '\0';
} else if (isdigit(ch)) {
for (; !is_eol() && isdigit(ch); get_ch()) {
*cp++ = (char)ch;
}
*cp = '\0';
num = atoi(tok);
} else
error("What? '%c'", ch);
}
void init_getsym(const int n) {
cur_line = n;
textp = 0;
ch = ' ';
getsym();
}
void skip_to_eol(void) {
tok[0] = '\0';
while (!is_eol())
get_ch();
}
int accept(const char s[]) {
if (strcmp(tok, s) == 0) {
getsym();
return True;
}
return False;
}
int expect(const char s[]) {
return accept(s) ? True : error("Expecting '%s', found: %s", s, tok);
}
int valid_line_num(void) {
if (num > 0 && num <= Max_Lines)
return True;
return error("Line number must be between 1 and %d", Max_Lines);
}
void goto_line(void) {
if (valid_line_num())
init_getsym(num);
}
void goto_stmt(void) {
if (isdigit(tok[0]))
goto_line();
else
error("Expecting line number, found: '%s'", tok);
}
void do_cmd(void) {
for (;;) {
while (tok[0] == '\0') {
if (cur_line == 0 || cur_line >= Max_Lines)
return;
init_getsym(cur_line + 1);
}
if (accept("bye")) {
printf("That's all folks!\n");
exit(0);
} else if (accept("run")) {
init_getsym(1);
} else if (accept("goto")) {
goto_stmt();
} else {
error("Unknown token '%s' at line %d", tok, cur_line); return;
}
}
}
int main() {
int i;
for (i = 0; i <= Max_Lines; i++) {
text[i] = calloc(sizeof(char), (Max_Len + 1));
}
setjmp(restart);
for (;;) {
printf("> ");
while (fgets(text[0], Max_Len, stdin) == NULL)
;
if (text[0][0] != '\0') {
init_getsym(0);
if (isdigit(tok[0])) {
if (valid_line_num())
strcpy(text[num], &text[0][textp]);
} else
do_cmd();
}
}
}
Hopefully, that will be enough to get you started. Have fun!
I will certainly get beaten by telling this ...but...:
First, I am actually working on a standalone library ( as a hobby ) that is made of:
a tokenizer, building linear (flat list) of tokens from the source text and following the same sequence as the text ( lexems created from the text flow ).
A parser by hands (syntax analyse; pseudo-compiler )
There is no "pseudo-code" nor "virtual CPU/machine".
Instructions(such as 'return', 'if' 'for' 'while'... then arithemtic expressions ) are represented by a base c++-struct/class and is the object itself. The base object, I name it atom, have a virtual method called "eval", among other common members, that is the "execution/branch" also by itself. So no matter I have an 'if' statement with its possible branchings ( single statement or bloc of statements/instructions ) as true or false condition, it will be called from the base virtual atom::eval() ... and so on for everything that is an atom.
Even 'objects' such as variables are 'atom'. 'eval()' will simply return its value from a variant container held by the atom itself ( pointer, refering to the 'local' variant instance (the instance variant iself) held the 'atom' or to another variant held by an atom that is created in a given 'bloc/stack'. So 'atom' are 'inplace' instructions/objects.
As of now, as an example, chunk of not really meaningful 'code' as below just works:
r = 5!; // 5! : (factorial of 5 )
Response = 1 + 4 - 6 * --r * ((3+5)*(3-4) * 78);
if (Response != 1){ /* '<>' also is not equal op. */
return r^3;
}
else{
return 0;
}
Expressions ( arithemtics ) are built into binary tree expression:
A = b+c; =>
=
/ \
A +
/ \
b c
So the 'instruction'/statement for expression like above is the tree-entry atom that in the above case, is the '=' (binary) operator.
The tree is built with atom::r0,r1,r2 :
atom 'A' :
r0
|
A
/ \
r1 r2
Regarding 'full-duplex' mecanism between c++ runtime and the 'script' library, I've made class_adaptor and adaptor<> :
ex.:
template<typename R, typename ...Args> adaptor_t<T,R, Args...>& import_method(const lstring& mname, R (T::*prop)(Args...)) { ... }
template<typename R, typename ...Args> adaptor_t<T,R, Args...>& import_property(const lstring& mname, R (T::*prop)(Args...)) { ... }
Second: I know there are plenty of tools and libs out there such as lua, boost::bind<*>, QML, JSON, etc... But in my situation, I need to create my very own [edit] 'independant' [/edit] lib for "live scripting". I was scared that my 'interpreter' could take a huge amount of RAM, but I am surprised that it is not as big as using QML,jscript or even lua :-)
Thank you :-)
Don't bother with hacking a parser together by hand. Use a parser generator. lex + yacc is the classic lexer/parser generator combination, but a Google search will reveal plenty of others.

Is it possible to have zlib read from and write to the same memory buffer?

I have a character buffer that I would like to compress in place. Right now I have it set up so there are two buffers and zlib's deflate reads from the input buffer and writes to the output buffer. Then I have to change the input buffer pointer to point to the output buffer and free the old input buffer. This seems like an unnecessary amount of allocation. Since zlib is compressing, the next_out pointer should always lag behind the next_in pointer. Anyway, I can't find enough documentation to verify this and was hoping someone had some experience with this. Thanks for your time!
It can be done, with some care. The routine below does it. Not all data is compressible, so you have to handle the case where the output data catches up with the input data. It takes a lot of incompressible data, but it can happen (see comments in code), in which case you have to allocate a buffer to temporarily hold the remaining input.
/* Compress buf[0..len-1] in place into buf[0..*max-1]. *max must be greater
than or equal to len. Return Z_OK on success, Z_BUF_ERROR if *max is not
enough output space, Z_MEM_ERROR if there is not enough memory, or
Z_STREAM_ERROR if *strm is corrupted (e.g. if it wasn't initialized or if it
was inadvertently written over). If Z_OK is returned, *max is set to the
actual size of the output. If Z_BUF_ERROR is returned, then *max is
unchanged and buf[] is filled with *max bytes of uncompressed data (which is
not all of it, but as much as would fit).
Incompressible data will require more output space than len, so max should
be sufficiently greater than len to handle that case in order to avoid a
Z_BUF_ERROR. To assure that there is enough output space, max should be
greater than or equal to the result of deflateBound(strm, len).
strm is a deflate stream structure that has already been successfully
initialized by deflateInit() or deflateInit2(). That structure can be
reused across multiple calls to deflate_inplace(). This avoids unnecessary
memory allocations and deallocations from the repeated use of deflateInit()
and deflateEnd(). */
int deflate_inplace(z_stream *strm, unsigned char *buf, unsigned len,
unsigned *max)
{
int ret; /* return code from deflate functions */
unsigned have; /* number of bytes in temp[] */
unsigned char *hold; /* allocated buffer to hold input data */
unsigned char temp[11]; /* must be large enough to hold zlib or gzip
header (if any) and one more byte -- 11
works for the worst case here, but if gzip
encoding is used and a deflateSetHeader()
call is inserted in this code after the
deflateReset(), then the 11 needs to be
increased to accomodate the resulting gzip
header size plus one */
/* initialize deflate stream and point to the input data */
ret = deflateReset(strm);
if (ret != Z_OK)
return ret;
strm->next_in = buf;
strm->avail_in = len;
/* kick start the process with a temporary output buffer -- this allows
deflate to consume a large chunk of input data in order to make room for
output data there */
if (*max < len)
*max = len;
strm->next_out = temp;
strm->avail_out = sizeof(temp) > *max ? *max : sizeof(temp);
ret = deflate(strm, Z_FINISH);
if (ret == Z_STREAM_ERROR)
return ret;
/* if we can, copy the temporary output data to the consumed portion of the
input buffer, and then continue to write up to the start of the consumed
input for as long as possible */
have = strm->next_out - temp;
if (have <= (strm->avail_in ? len - strm->avail_in : *max)) {
memcpy(buf, temp, have);
strm->next_out = buf + have;
have = 0;
while (ret == Z_OK) {
strm->avail_out = strm->avail_in ? strm->next_in - strm->next_out :
(buf + *max) - strm->next_out;
ret = deflate(strm, Z_FINISH);
}
if (ret != Z_BUF_ERROR || strm->avail_in == 0) {
*max = strm->next_out - buf;
return ret == Z_STREAM_END ? Z_OK : ret;
}
}
/* the output caught up with the input due to insufficiently compressible
data -- copy the remaining input data into an allocated buffer and
complete the compression from there to the now empty input buffer (this
will only occur for long incompressible streams, more than ~20 MB for
the default deflate memLevel of 8, or when *max is too small and less
than the length of the header plus one byte) */
hold = strm->zalloc(strm->opaque, strm->avail_in, 1);
if (hold == Z_NULL)
return Z_MEM_ERROR;
memcpy(hold, strm->next_in, strm->avail_in);
strm->next_in = hold;
if (have) {
memcpy(buf, temp, have);
strm->next_out = buf + have;
}
strm->avail_out = (buf + *max) - strm->next_out;
ret = deflate(strm, Z_FINISH);
strm->zfree(strm->opaque, hold);
*max = strm->next_out - buf;
return ret == Z_OK ? Z_BUF_ERROR : (ret == Z_STREAM_END ? Z_OK : ret);
}

Resources