www.pudn.com > bf533_decoder_mpeg4.rar > arith_decoder_mpeg4.asm
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : arith_decoder_mpeg4.asm
Label Name : __arith_decoder_mpeg4
Version : 2.0
Change History :
Version Date Author Comments
2.0 01/09/2007 Arjun Tested with VDSP++4.5
Compiler 7.2.3.2
1.3 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.2 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev.0.2
1.1 03/20/2002 Raghavendra Modified to match
silicon cycle count
1.0 09/11/2001 Raghavendra Original
Description : Arithmetic decoding contains four main steps
1. Removal of stuffed bits.
2. Initialization which is performed prior to the decoding of
the first symbol.
3. Decoding of symbol themselves. The decoding of each symbol
may be followed by a re-normalization step.
4. Termination which is performed after the decoding of the
last symbol.
The least probable symbol LPS is defined as the symbol with
least probability. If both probabilities are equal to half
(i.e 0x8000), then '0' symbol is considered as least probable.
In initialization lower bound L is set to zero and range
register R is set to 0x7fffffff. Encoder will do bit stuffing
depending on the following condition in order to avoid start
code emulation.
1's are stuffed into the bitstream whenever there are too many
successive 0's. If first 3(MAX-HEADING) bits are 0's then 1 is
transmitted and after MAX_HEADING th 0. If 10(MAX_MIDDLE) or
more 0's are sent successively a 1 is inserted after the
MAX_MIDDLE th 0. If the number of trailing 0's is larger than
2(MAX_TRAILING) then a 1 is appended. These stuffed bits are
removed properly while decoding.
The range associated with least probable symbol(LPS) is simply
computed as R*pLPS.
where R ->16 most significant bits of Range register value
pLPS -> probability of LPS symbol.
If R value is less than QUATER 1/4(i.e 0x40000000)then
re-normalization is performed. In this procedure both lower
value L and range R is doubled till R is greater than QUATER.
The following structure is used :
struct arcodec {
UInt L; -> 32bit fixed point register. Contains
the lower bound of the interval
UInt R; -> 32bit fixed point register. Contains
the range of the interval
UInt V; -> Contains the value of arithmetic code
value. It is always larger than or
equal to L and less than R value.
UInt arpipe;
Int bits_to_follow; -> follow bit count
Int first_bit; -> flag to check first bit
Int nzeros; -> counter to count consecutive zeros
Int nonzero;
Int nzerosf;
Int extrabits;
Int mh; -> to hold MAX_HEAD
Int mm; -> to hold MAX_MIDDLE
Int mt; -> to hold MAX_TRAIL
unsigned char *in; -> address of input compressed data array
}; typedef struct arcodec ArCoder;
UInt -> unsigned integer
Int -> interger
Assumption : Both input and output arrays are unsigned character array.
Each bit is stored in one location.
Prototype : void StartArDecoder(ArDecoder *decoder,unsigned char *in);
void arith_decoder_mpeg4(int co,ArDecoder *decoder);
void decode_renormlise(Arcoder *decoder);
void AddNextInputBit(ArDecoder *decoder);
void StopArDecoder( ArDecoder *decoder);
Calling sequence: Decoder is initialised by calling StartArDecoder.
arith_decoder_mpeg4 function is called for each context
which returns one bit as output. Finally _StopArcoder
function is called.
Following C code explains the calling sequence.
main()
{
int i,j,k,C;
struct ArCoder coder;
.
.
.
_StartArDecoder(coder,&bit_input[0]) ;
// bit_input is address of input array
for(i=0;iL) is set to zero,
the range register R(deocder->R) is to 0x7fffffff. The first 31 bits are read in
decoder->V register.
Registers used : R0-R7, P0-P2, P5, LC0.
*******************************************************************************/
#define MAXHEADING_ER 3
#define MAXMIDDLE_ER 10
#define MAXTRAILING_ER 2
.section L1_code;
.global __StartArDecoder;
.align 8;
__StartArDecoder:
P0 = R0; // Address of structure decoder
[--SP] = (R7:4,P5:5); // push R7:4,P5 register
P1 = 31; // set loop counter to read 31 bits
R0 = MAXHEADING_ER;
R5 = MAXMIDDLE_ER;
R2 = 1;
R1 = R1-R2(NS)||[P0+44] = R5;
// set decoder->mm to maximum zero count
[P0+52] = R1; // Address of input array
R3 = R1-R1(NS)||[P0+32] = R0;
// set decoder->mh to MAXHEADING_ER
[P0+40] = R0;
R0 = MAXTRAILING_ER;
R6 = R1-R1(NS)||[P0+48] = R0;
// set decoder->mt to MAXTRAILING_ER
P5 = [P0+52]; // get address of input array
R7 = R1-R1(NS)||[P0+36] = R3;
// clear decoder-> extra-bits
LSETUP(LOAD_31BITS_ST,LOAD_31BITS_END)LC0 = P1;
P1 = 1;
LOAD_31BITS_ST:
P2 = P1+P5; // address to fetch a bit
R7 = R7<<1||R0 = B[P2](Z);
// left shift V register by 1 and fetch next bit
R7 = R7+R0(NS)||R4 = [P0+32];
// add that bit to V register and fetch decoder->
// nzerof register
CC = R0 == 0; // check if bit == 0
IF CC R6 = R2;
R4 = R4-R6(NS)||R1 = [P0+36];
// if true decrement decoder->nzerof by one
CC = R4 == 0; // check if decoder->nzerof is zero
R6 = CC;
IF CC R3 = R2; // if true increment decoder->extrabit by one
R1 = R1+R3;
BITTGL(r6,0);
R6 = R6&R0; // check whether to set decoder->nzerof to
// MAXMIDDLE( i.e 10)
CC = R6 == 0;
IF CC R5 = R4;
P2 = R1;
P1 += 1; // increment the pointer to fetch next bit
R6 = R1-R1(NS)||[P0+36] = R1;
R3 = R2-R2(NS)||[P0+32] = R5;
// store decoder->nzerof
R5 = MAXMIDDLE_ER;
LOAD_31BITS_END:
P1 = P1+P2; // offset to fetch next bit
[P0+8] = R7; // store first 31 bits in decoder->V register
[P0] = R6; // Clear decoder->L register
[P0+16] = R6; // clear decoder->bits-to-follow register
[P0+12] = R7; // set decoder->arpipe to decoder->V register
[P0+28] = R6; // clear decoder->nonzero register
R0 = [P0+40];
BITSET(R3,31);
R3 += -1;
[P0+24] = R0;
[P0+4] = R3; // set decoder->R register to 0x7fffffff.
(R7:4,P5:5) = [SP++]; // pop R7:4,P5
RTS;
NOP;
__StartArDecoder.end:
/******************************************************************************
Prototype : void arith_decoder_mpeg4(int co,ArDecoder *decoder);
co-> probability of '0' symbol
In this procedure, probability of symbol '1' is calculated using probability of
symbol '0'. If probability of symbol '1' is greater than probability of symbol
'0' then '0' is the least probable symbol(LPS), else '1' is LPS. Range of LPS
symbol(rLPS) is calculated by multiplying higher 16 bit of range register(R) and
probability of LPS (CLPS). The interval(L,L+R) is split into two
intervals(L,L+R-rLPS) and (L+R-rLPS,R). If decoder->V is in later interval then
decoded symbol equals to LPS. Otherwise decoded symbol is opposite of LPS. The
interval (L,R) is then reduced to the sub interval in which decoder->V lies.
After the new interval has been computed, the new range R might be smaller than
0x40000000(QUATER).If so renormalization is carried out.
Registers used : R0-R3, R5-R7, P0-P2.
*******************************************************************************/
.section program;
.global __arith_decoder_mpeg4;
.align 8;
__arith_decoder_mpeg4:
P0 = R1; // Address of structure decoder
[--SP] = (R7:5); // Push R7:5
R2 = R0-R0(NS)||R6 = [P0];
// fetch value of lower range register
BITSET(R2,16); // set r2 = 0x8000
R2 = R2-R0(NS)||R7 = [P0+4];
// R0 = probability of '0'.R2 = probability of'1'
// and get Range value
R5 = R7>>16; // Higher 16 bits of range register
CC = R2V register
R1 = R1-R6(NS); // decoder->V - decoder->L
BITTGL(R0,0); // opposite of LPS
CC = R7 <= R1(IU); // check if (decoder->R-rLPS) < =
// (decoder->V-decoder->L)
IF CC R7 = R5; // if true Range == range of LPS
IF !CC R3 = R0;
IF !CC R2 = R6; // if false lower range == previous lower range
[P0+4] = R7; // store Range value in decoder->R register
R7 = R3<<0||[P0] = R2; // store decoder->L register
[--SP] = RETS; // Push RETS register before calling a function
R0 = P0; // argument to function decode_renormlise
CALL __decode_renormlise;
RETS = [SP++]; // Pop RETS register
R0 = R7;
(R7:5) = [SP++]; // Pop R7:5
RTS;
NOP;
__arith_decoder_mpeg4.end:
/*******************************************************************************
Prototype : void decode_renormlise(Arcoder *decoder);
As long as decoder->R is smaller than QUATER(0x40000000), renormalization is
performed.
If the interval(L,L+R) is within [0,HALF], the interval is scaled to [2L,2(L+R)]
and V is scald to 2V. If the interval(L,L+R) is within [HALF,1], the interval is
scaled to [2(L-HALF),2(L-HALF+R)] and V is scald to 2(V-HALF). Otherwise the
interval is scaled to 2(L-QUATER),2(L-QUATER+R) and V is scaled to 2(V-QUATER).
After each scaling, a bit is read and copied into the least significant bit of
register decoder->V.
Registers used : R0-R3, R6, R7, P0-P2, P5.
*******************************************************************************/
.section program;
.global __decode_renormlise;
.align 8;
__decode_renormlise:
P0 = R0; // Address of structure decoder
[--SP] = (R7:6); // Push R7:6,P5
[--SP] = P5;
[--SP] = RETS;
P5 = R0; // Duplicate the address of coder
R7 = R1-R1(NS)||R0 = [P0];
// fetch coder->L
R6 = R0-R0(NS)||R1 = [P0+4];
// fetch coder->R
BITSET(R7,31); // set r7 == 0x80000000
BITSET(R6,30); // set r6 == 0x40000000
CHK_WHILE:
CC = R1R <0x40000000
IF !CC JUMP NORME_END; // if false jump to NORME_END
CC = R7 <= R0(IU); // check if coder->L >0x80000000
IF !CC JUMP CHK_ELSE_IF;
R0 = R0-R7(NS)||R1 = [P0+8];
// coder->L -= 0x80000000
R1 = R1-R7(NS)||[P0] = R0;
// store coder->L value
[P0+8] = R1; // decoder->V -= HALF
JUMP END_WHILE;
CHK_ELSE_IF:
R2 = R0+R1; // Add decoder->L and decoder->R
CC = R2 <= R7(IU); // check if coder->L+coder->R < = 0x80000000
IF CC JUMP END_WHILE;
R0 = R0-R6(NS)||R1 = [P0+8];
// decrement coder->L by 0x40000000
R1 = R1-R6(NS)||[P0] = R0;
// store modified coder->L value
[P0+8] = R1; // decoder->V -= QUATER
END_WHILE:
R0 = [P0];
R0 = R0<<1||R1 = [P0+4];// double coder->L
R1 = R1<<1||[P0] = R0; // double coder->R
[P0+4] = R1; // and store updated values
R0 = P5;
CALL __AddNextInputBit; // get next bit and copy to LSB of
// decoder->V register
R0 = [P5];
R1 = [P5+4]; // fetch updated value of decoder->L ,R registers
JUMP CHK_WHILE; // repeat the procedure till coder->R < 0x40000000
NORME_END:
RETS = [SP++];
P5 = [SP++];
(R7:6) = [SP++]; // POP RETS and R7:6,P5 register
RTS;
NOP;
__decode_renormlise.end:
/******************************************************************************
Prototype : void AddNextInputBit(ArDecoder *decoder);
In this procedure any stuffed bits are removed. One bit is read from input array
and copied to LSB of V register.
Registers used : R0-R3, P0-P2.
*******************************************************************************/
.section program;
.global __AddNextInputBit;
.align 8;
__AddNextInputBit:
P0 = R0; // address of structure decoder
R1 = 1;
R3 = [P0+12]; // fetch decoder->arpipe
R2 = R3>>30||P1 = [P0+52];
// fetch address of input array
CC = BITTST(R2,0); // check input bit == 1
IF CC JUMP ELSE_CONDITION;
R2 = [P0+24];
R2 += -1;
[P0+24] = R2; // decrement decoder->nzeors by 1
CC = R2 == 0;
IF !CC JUMP COND_OVER;
R0 = [P0+36]; // fetch decoder->extrabits
R0 = R0-R1(NS)||R2 = B[P1++](Z);
// flush a stuffed bit
[P0+36] = R0; // store decoder->extrabits += -1
ELSE_CONDITION:
R2 = [P0+44]; // fetch decoder->mm
[P0+28] = R1; // store decoder->nonzero
[P0+24] = R2; // store decoder->nzerof = decoder->mm
COND_OVER:
R0 = 31;
R3 = R3<<1||R1 = [P0+36];
R0 = R0+R1(NS)||R2 = B[P1++](Z);
// flush a bit
P2 = R0; // offset to fetch current bit
R2 = [P0+8]; // fetch decoder->V
R2 = R2<<1; // shift left decoder->nzerof by 1 and fetch nzerof
R0 = 1;
P2 = P2+P1;
R1 = B[P2](Z); // fetch a bit
R1 = R1&R0;
R2 = R2+R1(NS)||R0 = [P0+32];
// add bit to LSB of decoder->V
R3 = R3+R1(NS)||[P0+8] = R2;
// add bit to LSB of decoder->arpipe
CC = R1 == 0;
IF !CC JUMP NO_DECREMENT;
R0 += -1;
[P0+32] = R0; // decrement decoder->nzerof
CC = R0 == 0;
IF !CC JUMP OVER;
R0 = [P0+36];
R0 += 1;
[P0+36] = R0;
NO_DECREMENT:
R0 = [P0+44]; // fetch decoder->mm
[P0+32] = R0; // store decoder-> nzerof as decoder->mm
OVER:
[P0+12] = R3; // store decoder-> arpipe
[P0+52] = P1; // store the current pointer of input array
RTS;
NOP;
__AddNextInputBit.end:
/*******************************************************************************
Prototype : void StopArDecoder( ArDecoder *decoder);
After the last symbol has been decoded,additional bits need to be consumed which
are introduced by the encoder for proper decodability. In general 3 further bits
need to be read. However in some cases, only 2 bits need to be read. These cases
are defined by
i) if the current interval covers entirely[QUATER-1,HALF]
ii) if the current interval covers entirely [ HALF-1, 3*QUATER].
Registers used : R0-R3, R5-R7, P0-P2, P5.
*******************************************************************************/
.section program;
.global __StopArDecoder;
.align 8;
__StopArDecoder:
P0 = R0; // Address of decoder
[--SP] = (R7:5,P5:5); // Push R7:5,P5 and RETS register
P5 = R0; // store the address in P5
R0 = [P0]; // get value of coder->L
R3 = 8;
R7 = R0>>29||R2 = [P0+4];
// fetch value of coder->R
R6 = R0+R2;
R6 = R6>>29;
CC = R6 == 0;
IF CC R6 = R3;
R6 = R6-R7;
CC = R6 == 3;
R2 = CC;
CC = BITTST(R7,0); // conditional check to find how many bits to read
R1 = CC;
[--SP] = RETS;
R0 = P5;
R7 = R2&R1;
CALL __AddNextInputBit; // fetch next bits and copy to LSB of
// decoder->V reg.
CC = R6 <= 3;
IF !CC JUMP CALL_END;
CC = R7 == 1;
IF CC JUMP CALL_END;
R0 = P5;
CALL __AddNextInputBit; // fetch next bits and copy to LSB of
// decoder->V reg.
CALL_END:
R0 = [P5+28];
CC = R0 == 0; // check if nzeros == 0
IF CC JUMP FLUSH_BITPLUS;
// if true flush a bit
R1 = [P5+44];
R2 = [P5+48]; // get difference of decoder->mm-decoder->mt
R1 = R1-R2(NS)||R0 = [P5+24];
CC = R0