www.pudn.com > bf533_decoder_mpeg4.rar > arith_decoder_mpeg4.asm


/******************************************************************************* 
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved. 
Developed by Joint Development Software Application Team, IPDC, Bangalore, India 
for Blackfin DSPs  ( Micro Signal Architecture 1.0 specification). 
 
By using this module you agree to the terms of the Analog Devices License 
Agreement for DSP Software.  
******************************************************************************** 
Module Name     : arith_decoder_mpeg4.asm 
Label Name      : __arith_decoder_mpeg4 
Version         :   2.0 
Change History  :  
 
                Version     Date          Author        Comments 
                2.0	    01/09/2007    Arjun	        Tested with VDSP++4.5 
							Compiler 7.2.3.2 
		1.3         11/18/2002    Swarnalatha   Tested with VDSP++ 3.0 
                                                        compiler 6.2.2 on  
                                                        ADSP-21535 Rev.0.2 
                1.2         11/13/2002    Swarnalatha   Tested with VDSP++ 3.0 
                                                        on ADSP-21535 Rev.0.2                                      
                1.1         03/20/2002    Raghavendra   Modified to match 
                                                        silicon cycle count 
                1.0         09/11/2001    Raghavendra   Original  
 
Description     : Arithmetic decoding contains four main steps 
                  1. Removal of stuffed bits. 
                  2. Initialization which is performed prior to the decoding of  
                     the first symbol. 
                  3. Decoding of symbol themselves. The decoding of each symbol  
                     may be followed by a re-normalization step. 
                  4. Termination which is performed after the decoding of the  
                     last symbol. 
 
                  The least probable symbol LPS is defined as the symbol with  
                  least probability. If both probabilities are equal to half  
                  (i.e 0x8000), then '0' symbol is considered as least probable. 
                  In initialization lower bound L is set to zero and range  
                  register R is set to 0x7fffffff. Encoder will do bit stuffing  
                  depending on the following condition in order to avoid start  
                  code emulation. 
                  1's are stuffed into the bitstream whenever there are too many 
                  successive 0's. If first 3(MAX-HEADING) bits are 0's then 1 is 
                  transmitted and after MAX_HEADING th 0. If 10(MAX_MIDDLE) or  
                  more 0's are sent successively a 1 is inserted after the  
                  MAX_MIDDLE th 0. If the number of trailing 0's is larger than 
                  2(MAX_TRAILING) then a 1 is appended. These stuffed bits are  
                  removed properly while decoding. 
                  The range associated with least probable symbol(LPS) is simply 
                  computed as R*pLPS.  
                    where R ->16 most significant bits of Range register value 
                    pLPS -> probability of LPS symbol. 
                  If R value is less than QUATER 1/4(i.e 0x40000000)then  
                  re-normalization is performed. In this procedure both lower  
                  value L and range R is doubled till R is greater than QUATER. 
 
                  The following structure is used : 
                  struct arcodec { 
                  UInt L;              -> 32bit fixed point register. Contains  
                                          the lower bound of the interval 
                  UInt R;              -> 32bit fixed point register. Contains  
                                          the range of the interval 
                  UInt V;              -> Contains the value of arithmetic code  
                                          value. It is always larger than or  
                                          equal to L and less than R value. 
                  UInt arpipe;  
                  Int bits_to_follow;  -> follow bit count 
                  Int first_bit;       -> flag to check first bit 
                  Int nzeros;          -> counter to count consecutive zeros 
                  Int nonzero;   
                  Int nzerosf;         
                  Int extrabits; 
                  Int mh;              ->  to hold MAX_HEAD 
                  Int mm;              ->  to hold MAX_MIDDLE 
                  Int mt;              ->  to hold MAX_TRAIL 
                  unsigned char *in;   -> address of input compressed data array 
                  }; typedef struct arcodec ArCoder;   
 
                  UInt -> unsigned integer  
                  Int -> interger  
 
Assumption      : Both input and output arrays are unsigned character array. 
                  Each bit is stored in one location. 
   
Prototype       : void StartArDecoder(ArDecoder *decoder,unsigned char *in); 
                  void arith_decoder_mpeg4(int co,ArDecoder *decoder); 
                  void decode_renormlise(Arcoder *decoder); 
                  void  AddNextInputBit(ArDecoder *decoder); 
                  void StopArDecoder( ArDecoder *decoder); 
 
Calling sequence: Decoder is initialised by calling StartArDecoder.  
                  arith_decoder_mpeg4 function is called for each context 
                  which returns one bit as output. Finally _StopArcoder  
                  function is called. 
 
                   Following C code explains the calling sequence. 
                      
                  main() 
                   {  
                      int i,j,k,C; 
                      struct ArCoder coder; 
                            . 
                            . 
                            . 
                        _StartArDecoder(coder,&bit_input[0]) ; 
                            // bit_input is address of input array  
                        for(i=0;iL) is set to zero,  
the range register R(deocder->R) is to 0x7fffffff. The first 31 bits are read in 
decoder->V register. 
 
Registers used : R0-R7, P0-P2, P5, LC0. 
*******************************************************************************/ 
#define MAXHEADING_ER 3 
#define MAXMIDDLE_ER 10 
#define MAXTRAILING_ER 2 
 
.section               L1_code; 
.global              __StartArDecoder; 
.align                         8; 
     
__StartArDecoder: 
 
    P0 = R0;                // Address of structure decoder 
    [--SP] = (R7:4,P5:5);   // push R7:4,P5 register 
    P1 = 31;                // set loop counter to read 31 bits 
    R0 = MAXHEADING_ER; 
    R5 = MAXMIDDLE_ER; 
    R2 = 1;                
    R1 = R1-R2(NS)||[P0+44] = R5; 
                            // set decoder->mm to maximum zero count  
    [P0+52] = R1;           // Address of input array 
    R3 = R1-R1(NS)||[P0+32] = R0; 
                            // set decoder->mh to MAXHEADING_ER  
    [P0+40] = R0;               
    R0 = MAXTRAILING_ER; 
    R6 = R1-R1(NS)||[P0+48] = R0; 
                            // set decoder->mt to MAXTRAILING_ER  
    P5 = [P0+52];           // get address of input array 
     R7 = R1-R1(NS)||[P0+36] = R3; 
                            // clear decoder-> extra-bits  
 
    LSETUP(LOAD_31BITS_ST,LOAD_31BITS_END)LC0 = P1; 
    
    P1 = 1; 
LOAD_31BITS_ST: 
        P2 = P1+P5;         // address to fetch a bit 
        R7 = R7<<1||R0 = B[P2](Z); 
                            // left shift V register by 1 and fetch next bit  
        R7 = R7+R0(NS)||R4 = [P0+32]; 
                            // add that bit to V register and fetch decoder-> 
                            // nzerof register  
        CC = R0 == 0;       // check if bit == 0 
        IF CC R6 = R2; 
        R4 = R4-R6(NS)||R1 = [P0+36]; 
                            // if true decrement decoder->nzerof by one  
        CC = R4 == 0;       // check if decoder->nzerof is zero 
        R6 = CC;               
        IF CC R3 = R2;      // if true increment decoder->extrabit by one 
        R1 = R1+R3; 
        BITTGL(r6,0); 
        R6 = R6&R0;         // check whether to set decoder->nzerof to  
                            // MAXMIDDLE( i.e 10) 
        CC = R6 == 0; 
        IF CC R5 = R4;             
        P2 = R1; 
        P1 += 1;            // increment the pointer to fetch next bit 
        R6 = R1-R1(NS)||[P0+36] = R1; 
        R3 = R2-R2(NS)||[P0+32] = R5; 
                            // store decoder->nzerof  
        R5 = MAXMIDDLE_ER; 
LOAD_31BITS_END: 
        P1 = P1+P2;         // offset to fetch next bit 
    [P0+8] = R7;            // store first 31 bits in decoder->V register 
    [P0] = R6;              // Clear decoder->L register 
    [P0+16] = R6;           // clear decoder->bits-to-follow register 
    [P0+12] = R7;           // set decoder->arpipe to decoder->V register 
    [P0+28] = R6;           // clear decoder->nonzero register 
    R0 = [P0+40]; 
    BITSET(R3,31);           
    R3 += -1; 
    [P0+24] = R0;  
    [P0+4] = R3;            // set decoder->R register to 0x7fffffff. 
    (R7:4,P5:5) = [SP++];   // pop R7:4,P5 
    RTS; 
    NOP; 
__StartArDecoder.end:     
/****************************************************************************** 
Prototype : void arith_decoder_mpeg4(int co,ArDecoder *decoder); 
    co-> probability of '0' symbol 
 
In this procedure, probability of symbol '1' is calculated using probability of  
symbol '0'. If probability of symbol '1' is greater than probability of symbol 
'0' then '0' is the least probable symbol(LPS), else '1' is LPS. Range of LPS  
symbol(rLPS) is calculated by multiplying higher 16 bit of range register(R) and 
probability of LPS (CLPS). The interval(L,L+R) is split into two  
intervals(L,L+R-rLPS) and (L+R-rLPS,R). If decoder->V is in later interval then  
decoded symbol equals to LPS. Otherwise decoded symbol is opposite of LPS. The  
interval (L,R) is then reduced to the sub interval in which decoder->V lies.  
After the new interval has been computed, the new range R might be smaller than  
0x40000000(QUATER).If so renormalization is carried out. 
 
Registers used : R0-R3, R5-R7, P0-P2. 
*******************************************************************************/ 
.section               program; 
.global        __arith_decoder_mpeg4; 
.align                      8; 
     
__arith_decoder_mpeg4: 
 
    P0 = R1;                // Address of structure decoder 
    [--SP] = (R7:5);        // Push R7:5 
    R2 = R0-R0(NS)||R6 = [P0]; 
                            // fetch value of lower range register  
    BITSET(R2,16);          // set r2 = 0x8000 
    R2 = R2-R0(NS)||R7 = [P0+4]; 
                            // R0 = probability of '0'.R2 = probability of'1'  
                            // and get Range value  
    R5 = R7>>16;            // Higher 16 bits of range register 
    CC = R2V register 
    R1 = R1-R6(NS);         // decoder->V - decoder->L 
    BITTGL(R0,0);           // opposite of LPS 
    CC = R7 <= R1(IU);      // check if (decoder->R-rLPS) < =  
                            // (decoder->V-decoder->L) 
    IF CC R7 = R5;          // if true Range ==  range of LPS 
    IF !CC R3 = R0; 
    IF !CC R2 = R6;         // if false lower range  ==  previous lower range 
    [P0+4] = R7;            // store Range value in decoder->R register 
    R7 = R3<<0||[P0] = R2;  // store decoder->L register 
    [--SP] = RETS;          // Push RETS register before calling a function 
    R0 = P0;                // argument to function decode_renormlise 
    CALL __decode_renormlise; 
    RETS = [SP++];          // Pop RETS register 
    R0 = R7; 
    (R7:5) = [SP++];        // Pop R7:5 
    RTS;   
    NOP; 
 
__arith_decoder_mpeg4.end:     
/******************************************************************************* 
Prototype  : void decode_renormlise(Arcoder *decoder); 
 
As long as decoder->R is smaller than QUATER(0x40000000), renormalization is  
performed. 
If the interval(L,L+R) is within [0,HALF], the interval is scaled to [2L,2(L+R)] 
and V is scald to 2V. If the interval(L,L+R) is within [HALF,1], the interval is 
scaled to [2(L-HALF),2(L-HALF+R)] and V is scald to 2(V-HALF). Otherwise the  
interval is scaled to 2(L-QUATER),2(L-QUATER+R) and V is scaled to 2(V-QUATER). 
After each scaling, a bit is read and copied into the least significant bit of  
register decoder->V. 
 
Registers used : R0-R3, R6, R7, P0-P2, P5. 
*******************************************************************************/ 
.section                       program; 
.global            __decode_renormlise; 
.align                               8; 
__decode_renormlise: 
 
    P0 = R0;                // Address of structure decoder  
    [--SP] = (R7:6);        // Push R7:6,P5 
    [--SP] = P5; 
    [--SP] = RETS; 
    P5 = R0;                // Duplicate the address of coder 
    R7 = R1-R1(NS)||R0 = [P0]; 
                            // fetch coder->L  
    R6 = R0-R0(NS)||R1 = [P0+4]; 
                            // fetch coder->R  
    BITSET(R7,31);          // set r7 == 0x80000000 
    BITSET(R6,30);          // set r6 == 0x40000000 
CHK_WHILE: 
    CC = R1R <0x40000000  
    IF !CC JUMP NORME_END;  // if false jump to NORME_END 
    CC = R7 <= R0(IU);      // check if coder->L >0x80000000 
    IF !CC JUMP CHK_ELSE_IF; 
    R0 = R0-R7(NS)||R1 = [P0+8]; 
                            // coder->L -= 0x80000000  
    R1 = R1-R7(NS)||[P0] = R0; 
                            // store coder->L value  
    [P0+8] = R1;            // decoder->V -= HALF 
    JUMP END_WHILE; 
CHK_ELSE_IF: 
    R2 = R0+R1;   // Add decoder->L and decoder->R 
    CC = R2 <= R7(IU);      // check if coder->L+coder->R < = 0x80000000 
    IF CC JUMP END_WHILE; 
    R0 = R0-R6(NS)||R1 = [P0+8]; 
                            // decrement coder->L by 0x40000000  
    R1 = R1-R6(NS)||[P0] = R0; 
                            // store modified coder->L value  
    [P0+8] = R1;            // decoder->V -= QUATER 
END_WHILE: 
    R0 = [P0]; 
    R0 = R0<<1||R1 = [P0+4];// double coder->L 
    R1 = R1<<1||[P0] = R0;  // double coder->R 
    [P0+4] = R1;            // and store updated values 
    R0 = P5; 
 
    CALL __AddNextInputBit; // get next bit and copy to LSB of  
                            // decoder->V register 
    R0 = [P5]; 
    R1 = [P5+4];            // fetch updated value of decoder->L ,R registers 
    JUMP CHK_WHILE;         // repeat the procedure till coder->R < 0x40000000 
     
NORME_END: 
    RETS = [SP++]; 
    P5 = [SP++]; 
    (R7:6) = [SP++];        // POP RETS and R7:6,P5 register 
    RTS; 
    NOP; 
__decode_renormlise.end:     
/****************************************************************************** 
Prototype : void  AddNextInputBit(ArDecoder *decoder); 
In this procedure any stuffed bits are removed. One bit is read from input array 
and copied to LSB of V register. 
 
Registers used : R0-R3, P0-P2. 
*******************************************************************************/ 
.section                 program; 
.global               __AddNextInputBit; 
.align                        8; 
     
__AddNextInputBit: 
 
    P0 = R0;                // address of structure decoder 
    R1 = 1;  
    R3 = [P0+12];           // fetch decoder->arpipe 
    R2 = R3>>30||P1 = [P0+52]; 
                            // fetch address of input array  
    CC = BITTST(R2,0);      // check input bit  == 1 
    IF CC JUMP ELSE_CONDITION; 
    R2 = [P0+24]; 
    R2 += -1; 
    [P0+24] = R2;           // decrement decoder->nzeors by 1 
    CC = R2 == 0; 
    IF !CC JUMP COND_OVER; 
    R0 = [P0+36];           // fetch decoder->extrabits 
    R0 = R0-R1(NS)||R2 = B[P1++](Z); 
                            // flush a stuffed bit  
    [P0+36] = R0;           // store decoder->extrabits  += -1 
ELSE_CONDITION: 
    R2 = [P0+44];           // fetch decoder->mm 
    [P0+28] = R1;           // store decoder->nonzero 
    [P0+24] = R2;           // store decoder->nzerof  = decoder->mm 
COND_OVER: 
    R0 = 31;  
    R3 = R3<<1||R1 = [P0+36];   
    R0 = R0+R1(NS)||R2 = B[P1++](Z); 
                            // flush a bit  
    P2 = R0;                // offset to fetch current bit 
    R2 = [P0+8];            // fetch decoder->V 
    R2 = R2<<1;             // shift left decoder->nzerof by 1 and fetch nzerof 
    R0 = 1; 
    P2 = P2+P1; 
    R1 = B[P2](Z);          // fetch a bit 
    R1 = R1&R0; 
    R2 = R2+R1(NS)||R0 = [P0+32]; 
                            // add bit to LSB of decoder->V  
    R3 = R3+R1(NS)||[P0+8] = R2; 
                            // add bit to LSB of decoder->arpipe  
    CC = R1 == 0;              
    IF !CC JUMP NO_DECREMENT; 
    R0 += -1; 
    [P0+32] = R0;           // decrement decoder->nzerof 
    CC = R0 == 0; 
    IF !CC JUMP OVER; 
    R0 = [P0+36]; 
    R0 += 1; 
    [P0+36] = R0; 
NO_DECREMENT: 
    R0 = [P0+44];           // fetch decoder->mm 
    [P0+32] = R0;           // store decoder-> nzerof as decoder->mm 
OVER: 
    [P0+12] = R3;           // store decoder-> arpipe 
    [P0+52] = P1;           // store the current pointer of input array 
    RTS; 
    NOP; 
__AddNextInputBit.end:     
/******************************************************************************* 
Prototype : void StopArDecoder( ArDecoder *decoder); 
 
After the last symbol has been decoded,additional bits need to be consumed which 
are introduced by the encoder for proper decodability. In general 3 further bits 
need to be read. However in some cases, only 2 bits need to be read. These cases 
are defined by 
i)  if the current interval covers entirely[QUATER-1,HALF] 
ii) if the current interval covers entirely [ HALF-1, 3*QUATER]. 
 
Registers used : R0-R3, R5-R7, P0-P2, P5. 
*******************************************************************************/ 
.section             program; 
.global             __StopArDecoder; 
.align                      8; 
     
__StopArDecoder: 
    P0 = R0;                // Address of decoder 
    [--SP] = (R7:5,P5:5);   // Push R7:5,P5 and RETS register 
    P5 = R0;                // store the address in P5 
    R0 = [P0];              // get value of coder->L 
    R3 = 8;     
    R7 = R0>>29||R2 = [P0+4]; 
                            // fetch value of coder->R  
    R6 = R0+R2;             
    R6 = R6>>29;              
    CC = R6 == 0; 
    IF CC R6 = R3; 
    R6 = R6-R7; 
    CC = R6 == 3; 
    R2 = CC; 
    CC = BITTST(R7,0);      // conditional check to find how many bits to read 
    R1 = CC; 
    [--SP] = RETS;        
    R0 = P5; 
    R7 = R2&R1; 
     
    CALL __AddNextInputBit; // fetch next bits and copy to LSB of  
                            // decoder->V reg. 
    CC = R6 <= 3; 
    IF !CC JUMP CALL_END; 
    CC = R7 == 1; 
    IF CC JUMP CALL_END; 
    R0 = P5; 
    CALL __AddNextInputBit; // fetch next bits and copy to LSB of  
                            // decoder->V reg. 
     
CALL_END: 
    R0 = [P5+28];             
    CC = R0 == 0;           // check if nzeros == 0 
    IF CC JUMP FLUSH_BITPLUS; 
                            // if true flush a bit  
    R1 = [P5+44]; 
    R2 = [P5+48];           // get difference of decoder->mm-decoder->mt 
    R1 = R1-R2(NS)||R0 = [P5+24]; 
    CC = R0