www.pudn.com > bf533_encoder_mpeg4.rar > arith_encoder_mpeg4.asm
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : arith_encoder_mpeg4.asm
Label Name : __arith_encoder_mpeg4
Version : 2.0
Change History :
Version Date Author Comments
2.0 01/09/2007 Arjun Tested with VDSP++4.5
Compiler 7.2.3.2
1.3 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.2 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev.0.2
1.1 03/20/2002 Raghavendra Modified to match
silicon cycle count
1.0 09/11/2001 Raghavendra Original
Description : The arithmetic coding works on the basis of recursive
probability interval sub division. With each binary decision
the current probability interval is subdivided into two
subintervals and code string is modified so that it points to
base of the probability subinterval. Arithmetic encoder uses
different probability tables for INTRA and NON-INTRA
contexts. These tables contain the probabilities for a binary
alpha pixel being equal to 0 for intra and inter shape coding.
The least probable symbol LPS is defined as the symbol with
least probability. If both probabilities are equal to
half (i.e 0x8000), then '0' symbol is considered as least
probable.
In initialization lower bound L is set to zero and range
register R is set to 0x7fffffff. In order to avoid start code
emulation, 1's are stuffed into the bitstream whenever there
are too many successive 0's. If first 3(MAX-HEADING) bits are
0's then 1 is transmitted and after MAX_HEADING th 0. If
10(MAX_MIDDLE) or more 0's are sent successively a 1 is
inserted after the MAX_MIDDLE th 0. If the number of trailing
0's is larger than 2(MAX_TRAILING) then a 1 is appended.
The range associated with least probable symbol(LPS) is simply
computed as R*pLPS.
where R ->16 most significant bits of Range register
pLPS -> probability of LPS symbol.
If R value is less than QUATER (i.e 0x40000000)then
re-normalisation is performed. In this procedure both lower
value L and range R is doubled till R is greater than QUATER.
The following structure is used :
struct arcodec {
UInt L; // lower bound
UInt R; // code range
UInt V; // current code value
UInt arpipe;
Int bits_to_follow; // follow bit count
Int first_bit; // flag to check first bit
Int nzeros; // counter to count consecutive zeros
Int nonzero;
Int nzerosf;
Int extrabits;
Int mh; // MAX_HEAD
Int mm; // MAX_MIDDLE
Int mt; // MAX_TRAIL
unsigned char *out; // output array
};typedef struct arcodec ArCoder;
This structure is common for both encoder and decoder.
Assumption : Input bits are stored in an unsigned character array and each
bit is stored in one location. Similarly each output bit is
written to an unsigned character location.
Prototype : void StartArCoder(ArCoder * coder, unsigned char *);
void arith_encoder_mpeg4(int bit,int C0, ArCcoder * coder);
void arith_encode_renormalise(ArCoder *coder);
void arith_bitplusfollow(int bit, ArCoder *coder);
void arith_StopArCoder(ArCoder *coder);
Calling sequence: Arithetic encoder is initialised by calling _StartArCoder
function. _arith_encoder_mpeg4 function is called for each
context with a decision bit.
Finally _arith_StopArcoder function is called. Following C
code explains the calling sequence.
main()
{
int i,j,C,D;
struct ArCoder coder;
:
:
:
_StartArCoder(coder,&bit_output[0]) ;
// bit_output is address of output array
for(i=0;imm to MAX_HEADER value i.e 3
[P0+44] = R2; // to keep track of maximum zeros in middle of bit
// stream
R2 = MAXTRAILING_ER;
[P0+48] = R2; // store coder->mt as MAX_TRAILING value i.e 2
[P0+52] = R1; // address of output array
RTS;
NOP;
__StartArCoder.end:
/******************************************************************************
Prototype : void arith_encoder_mpeg4(int bit,int C0, ArCcoder * coder);
bit -> decision bit which is to be coded
C0 -> probability of symbol '0'.
coder -> address of structure variable ArCcoder.
In this procedure, probability of symbol '1' is calculated using probability of
symbol '0' If probability of symbol '1' is greater then probability of symbol
'0' then '0' is treated as LPS else '1' is treated as LPS. Range of LPS
symbol(rLPS) is calculated by multiplying higher 16 bit of range register(R) and
probability of LPS (CLPS). If bit to be coded equals to LPS then value of lower
register coder->L += coder->R-rLPS and new range value(coder->R) equals to range
of LPS(rLPS). Otherwise range is decremented by rLPS. If R value is less than
QUATER (i.e 0x40000000)then re-normalisation is performed. In this procedure
both lower value L and range R is doubled till R is greater than QUATER.
Registers used : R0-R3, R5-R7, P0, P1.
*******************************************************************************/
.section program;
.global __arith_encoder_mpeg4;
.align 8;
__arith_encoder_mpeg4:
P0 = R2; // Address of structure coder
[--SP] = (R7:5); // Push R7:5
P1 = R2; // Duplicate the address of coder
R2 = R0-R0(NS)||R6 = [P0++];
// fetch value of lower range register
BITSET(R2,16); // set r2 = 0x8000
R2 = R2-R1(NS)||R7 = [P0];
// R1 = probability of '0'.R2 = probability of'1'
// and get Range value
CC = R2>16; // Higher 16 bits of range register
R3 = CC; // R3 contains Least probable symbol
R5 = R5.L*R1.L(FU); // Range LPS(rLPS) = R * CLPS
R7 = R7-R5; // Range = Range -range of LPS
R2 = R6+R7; // Lower value += Range -range of LPS
CC = R0 == R3; // check if bit to be coded == LPS
IF CC R7 = R5; // if true Range == range of LPS
IF !CC R2 = R6; // if false lower range == previous lower range
[P0--] = R7; // store new Range value in R register
[P0] = R2; // store L register
[--SP] = RETS; // Push RETS register before calling a function
R0 = P1; // argument to function
CALL __arith_encode_renormalise;
RETS = [SP++]; // Pop RETS register
(R7:5) = [SP++]; // Pop R7:5
RTS;
NOP;
__arith_encoder_mpeg4.end:
/******************************************************************************
Prototype : void arith_encode_renormalise(ArCoder *coder);
Renormalization is done only if Range value (coder->R) is less than 0x40000000.
If lower value(coder->L) is greater than or equals to HALF(0x80000000) then '1'
is written to bitstream and lower value is decremented by HALF(0x80000000). If
sum of lower value and range value (coder->L +coder->R) is greater than HALF
then '0' is written to bitstream, otherwise lower value (coder->L )is
decremented by 0x40000000. Whenever renormalization is done, value of both
coder->L and coder-R is doubled. This procedure is repeated till range value
(coder->R)is greater than 0x40000000.
Registers used : R0-R3, R6, R7, P0, P5.
*******************************************************************************/
.section program;
.global __arith_encode_renormalise;
.align 8;
__arith_encode_renormalise:
P0 = R0; // Address of coder
[--SP] = (R7:6,P5:5); // Push R7:6,P5
P5 = R0; // Duplicate the address of coder
R7 = R1-R1(NS)||R0 = [P0++];
// fetch coder->L
R6 = R0-R0(NS)||R1 = [P0--];
// fetch coder->R
BITSET(R7,31); // set r7 == 0x80000000
BITSET(R6,30); // set r6 == 0x40000000
CHK_WHILE:
CC = R1R <0x40000000
IF !CC JUMP NORME_END; // if false jump to NORME_END
CC = R7L >0x80000000
IF !CC JUMP CHK_ELSE_IF;
R0 = R0-R7; // coder->L -= 0x80000000
[P0] = R0; // store coder->L value
R0 = 1; // arguments to arith_bitplusfollow function
R1 = P5;
[--SP] = RETS;
CALL __arith_bitplusfollow;
RETS = [SP++];
P0 = P5;
JUMP END_WHILE;
CHK_ELSE_IF: R2 = R0+R1;
CC = R2 <= R7(IU); // check if coder->L+coder->R <= 0x80000000
IF !CC JUMP CHK_ELSE; // if true call arith_bitplusfollow with bit == 0
R0 = 0;
R1 = P5;
[--SP] = RETS;
CALL __arith_bitplusfollow;
RETS = [SP++];
P0 = P5;
JUMP END_WHILE;
CHK_ELSE:
R2 = 1; // else increment bits_to_follow
R0 = R0-R6(NS)||R1 = [P0+16];
// decrement coder->L by 0x40000000
R1 = R1+R2(NS)||[P0] = R0;
// store modified coder->L value
[P0+16] = R1; // store incremented bits_to_follow value
END_WHILE:
R0 = [P0++];
R0 = R0<<1||R1 = [P0--];// double coder->L
R1 = R1<<1||[P0++] = R0;// double coder->R
[P0--] = R1; // store updated values
JUMP CHK_WHILE; // repeat the procedure till coder->R < 0x40000000
NOP; // to remove stalls on the silicon
NORME_END:
(R7:6,P5:5) = [SP++];
RTS;
NOP;
__arith_encode_renormalise.end:
/*******************************************************************************
Prototype : void arith_bitplusfollow(int bit, ArCoder *coder);
This function writes a bit to bit-stream. In order to avoid start code emulation
1's are stuffed whenever there are too many successive '0's. If the first
3(MAX_HEADING) bits are '0' then '1' is stuffed. If 10(MAX_MIDDLE) or more
zeros are sent then '1' is inserted after MAX_MIDDLE th '0'.
Registers used : R0-R3, R7, P0, P1.
******************************************************************************/
.section program;
.global __arith_bitplusfollow;
.align 8;
__arith_bitplusfollow:
P0 = R1; // Address of coder
[--SP] = R7; // Push R7 register
R1 = 1;
R7 = R0 <<0 || P1 = [P0+52];
// fetch address of output buffer
R2 = R1-R1(NS) || R3 = [P0+20];
// fetch value of first_bit flag.
CC = R3 == 0; // check if first_bit flag == 0
IF !CC JUMP CLR_FIRST_BIT;
// if false jump to CLR_FIRST_BIT
B[P1++] = R0; // store bit value to output buffer
CC = R0 == 0; // check if bit value == 0
IF !CC JUMP CHK_ELSE_COND;
R2 = [P0+24]; // get value of nzeros
R2 += -1; // decrement the value of nzeros
CC = R2 == 0; // if nzeros == 0, stuff '1'.
IF CC JUMP CHK_ELSE_1;
[P0+24] = R2;
JUMP CHK_WHILE_LOOP;
CHK_ELSE_1:
B[P1++] = R1;
// stuff '1' to output buffer
CHK_ELSE_COND:
[P0+28] = R1;
R3 = [P0+44]; // set coder->nzeros to MAX_MIDDLE(i.e 10)
[P0+24] = R3;
JUMP CHK_WHILE_LOOP;
CLR_FIRST_BIT:
[P0+20] = R2;
// clear first bit
// if coder->bits-to-follow is non zero value then !bit value is added to
// bitstream and coder->bits-to-follow is decremented. this process is continued
// till coder->bits-to-follow becomes to zero.
CHK_WHILE_LOOP:
BITTGL(R7,0);
R2 = [P0+16]; // get value of coder->bits_to_follow
TEST_BACK:
CC = R2 <= 0;
// check if coder->bits_to_follow <= 0
IF CC JUMP END_WHILE_LOOP1(bp);
R2 = R2-R1(ns)||B[P1++] = R7;
// if false !bit is stored as output
CC = R7 == 0; // stored bit is checked for zero
IF !CC JUMP CHK_ELSE2; // if true decrement the nzero counter
R0 = [P0+24];
R0 += -1;
[P0+24] = R0;
CC = R0 == 0;
IF !CC JUMP TEST_BACK;
B[P1++] = R1;
CHK_ELSE2:
[P0+28] = R1;
R3 = [P0+44];
[P0+24] = R3; // set coder->nzeros to MAX_MIDDLE (i.e 10)
JUMP TEST_BACK;
END_WHILE_LOOP1:
[P0+16] = R2;
END_WHILE_LOOP:
[P0+52] = P1;
// store the present pointer of output address
R7 = [SP++]; // Pop R7 register
RTS;
NOP;
__arith_bitplusfollow.end:
/*******************************************************************************
Prototype : void arith_StopArCoder(ArCoder *coder);
In this procedure additional bits are copied to output bit stream which is
required for proper decoding. If 2(MAX_TRAIL) or more zeroes are sent then '1'
is inserted after MAX_TRAIL th '0'.
Registers used : R0-R3, R5-R7, P0, P5.
*******************************************************************************/
.section program;
.global __arith_StopArCoder;
.align 8;
__arith_StopArCoder:
P0 = R0;
// Address of coder
[--SP] = (R7:5,P5:5); // Push R7:5,P5 and RETS register
[--SP] = RETS;
P5 = R0; // store the address in P5
R0 = [P0]; // get value of coder->L
R3 = 8;
R7 = R0>>29||R2 = [P0+4];
// fetch value of coder->R
R6 = R0+R2;
R6 = R6>>29;
CC = R6 == 0;
IF CC R6 = R3;
R3 = R6-R7;
CC = R3 <= 3;
IF !CC JUMP BIT_EQ_2;
CC = BITTST(R7,0); // conditional check to find how many
// bits are left
//r1 = cycles;
R0 = CC;
CC = R3 == 3;
R3 = CC;
R6 = 3;
//r2 = cycles;
R3 = R3&R0;
CC = R3 == 1;
IF CC JUMP BIT_EQ_2;
R7 += 1;
JUMP FOR_LOOP;
BIT_EQ_2:
R7 >>= 1;
R7 += 1;
R6 = 2;
FOR_LOOP:
R5 = -R6;
R5 += 1;
JUMP_BACK:
CC = R6 == 0;
IF CC JUMP END_FORLOOP;
R0 = LSHIFT R7 BY R5.L;
R2 = 1;
R0 = R0&R2;
R1 = P5; // call arith_bitplusfollow to write bits to
//bit-stream
CALL __arith_bitplusfollow;
R6 += -1; // decrement the counter
R5 += 1;
JUMP JUMP_BACK;
END_FORLOOP:
R0 = [P5+28];
// get value of coder->nzeros
CC = R0 == 0;
IF CC JUMP CALL_BITPLUS;
R1 = [P5+44];
R2 = [P5+48];
R1 = R1-R2(NS)||R0 = [P5+24];
CC = R0