www.pudn.com > bf533_encoder_mpeg4.rar > arith_encoder_mpeg4.asm


/******************************************************************************* 
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved. 
Developed by Joint Development Software Application Team, IPDC, Bangalore, India 
for Blackfin DSPs  ( Micro Signal Architecture 1.0 specification). 
 
By using this module you agree to the terms of the Analog Devices License 
Agreement for DSP Software.  
******************************************************************************** 
Module Name     : arith_encoder_mpeg4.asm 
Label Name      : __arith_encoder_mpeg4 
Version         :   2.0 
Change History  : 
 
                Version     Date          Author        Comments 
                2.0	    01/09/2007    Arjun	        Tested with VDSP++4.5 
							Compiler 7.2.3.2 
		1.3         11/18/2002    Swarnalatha   Tested with VDSP++ 3.0 
                                                        compiler 6.2.2 on  
                                                        ADSP-21535 Rev.0.2 
                1.2         11/13/2002    Swarnalatha   Tested with VDSP++ 3.0 
                                                        on ADSP-21535 Rev.0.2                                              
                1.1         03/20/2002    Raghavendra   Modified to match 
                                                        silicon cycle count 
                1.0         09/11/2001    Raghavendra   Original  
 
 
Description     : The  arithmetic coding works on the basis of recursive  
                  probability interval sub division. With each binary decision  
                  the current probability interval is subdivided into two  
                  subintervals and  code string is modified so that it points to 
                  base of the probability subinterval. Arithmetic encoder uses  
                  different  probability tables for INTRA and NON-INTRA  
                  contexts. These tables contain the probabilities for a binary  
                  alpha pixel being equal to 0 for intra and inter shape coding. 
                  The least probable symbol LPS is defined as the symbol with  
                  least probability. If both probabilities are equal to  
                  half (i.e 0x8000), then '0' symbol is considered as least  
                  probable. 
                  In initialization lower bound L is set to zero and range  
                  register R is set to 0x7fffffff. In order to avoid start code  
                  emulation, 1's are stuffed into the bitstream whenever there  
                  are too many successive 0's. If first 3(MAX-HEADING) bits are  
                  0's then 1 is transmitted and after MAX_HEADING th 0. If  
                  10(MAX_MIDDLE) or more 0's are sent successively a 1 is  
                  inserted after the MAX_MIDDLE th 0. If the number of trailing  
                  0's is larger than 2(MAX_TRAILING) then a 1 is appended. 
                  The range associated with least probable symbol(LPS) is simply 
                  computed as R*pLPS.  
                  where R ->16 most significant bits of Range register  
                     pLPS -> probability of LPS symbol. 
                  If R value is less than QUATER (i.e 0x40000000)then  
                  re-normalisation is performed. In this procedure both lower  
                  value L and range R is doubled till R is greater than QUATER. 
           
 
                  The following structure is used :  
                  struct arcodec { 
                  UInt L;              // lower bound 
                  UInt R;              // code range 
                  UInt V;              // current code value 
                  UInt arpipe; 
                  Int bits_to_follow;  // follow bit count 
                  Int first_bit;       // flag to check first bit 
                  Int nzeros;          // counter to count consecutive zeros 
                  Int nonzero; 
                  Int nzerosf; 
                  Int extrabits; 
                  Int mh;              // MAX_HEAD 
                  Int mm;              // MAX_MIDDLE 
                  Int mt;              // MAX_TRAIL 
                  unsigned char *out;  // output array 
                                 };typedef struct arcodec ArCoder;    
 
                  This structure is common for both encoder and decoder. 
                                                                             
Assumption      : Input bits are stored in an unsigned character array and each  
                  bit is stored in one location. Similarly  each output bit is  
                  written to an unsigned character location. 
 
Prototype       : void StartArCoder(ArCoder * coder, unsigned char *); 
                  void arith_encoder_mpeg4(int bit,int C0, ArCcoder * coder); 
                  void arith_encode_renormalise(ArCoder  *coder); 
                  void arith_bitplusfollow(int bit, ArCoder *coder); 
                  void arith_StopArCoder(ArCoder *coder); 
 
Calling sequence: Arithetic encoder is initialised by calling _StartArCoder  
                  function. _arith_encoder_mpeg4 function is called for each  
                  context with a decision bit. 
                  Finally _arith_StopArcoder function is called. Following C  
                  code explains the calling sequence. 
                      
                  main() 
                   {  
                      int i,j,C,D; 
                      struct ArCoder coder; 
                        :                                              
                        : 
                        : 
                        _StartArCoder(coder,&bit_output[0]) ; 
                            // bit_output is address of output array  
                        for(i=0;imm to MAX_HEADER value i.e 3 
    [P0+44] = R2;           // to keep track of maximum zeros in middle of bit  
                            // stream 
    R2 = MAXTRAILING_ER; 
    [P0+48] = R2;           // store coder->mt as MAX_TRAILING value i.e 2 
    [P0+52] = R1;           // address of output array 
    RTS; 
    NOP; 
     
__StartArCoder.end:  
     
 
/****************************************************************************** 
Prototype : void arith_encoder_mpeg4(int bit,int C0, ArCcoder * coder); 
 
            bit -> decision bit which is to be coded 
            C0  -> probability of symbol '0'. 
            coder -> address of structure variable ArCcoder. 
 
In this procedure, probability of symbol '1' is calculated using probability of  
symbol '0' If probability of symbol '1' is greater then probability of symbol  
'0' then '0' is treated as LPS else '1' is treated as LPS. Range of LPS  
symbol(rLPS) is calculated by multiplying higher 16 bit of range register(R) and 
probability of LPS (CLPS). If bit to be coded equals to LPS then value of lower  
register coder->L += coder->R-rLPS and new range value(coder->R) equals to range 
of LPS(rLPS). Otherwise range is decremented by rLPS. If R value is less than  
QUATER (i.e 0x40000000)then re-normalisation is performed. In this  procedure  
both lower value L and range R is doubled till R is greater than QUATER. 
 
Registers used : R0-R3, R5-R7, P0, P1. 
*******************************************************************************/ 
.section               program; 
.global        __arith_encoder_mpeg4; 
.align                      8; 
     
__arith_encoder_mpeg4:   
 
    P0 = R2;                // Address of structure coder 
    [--SP] = (R7:5);        // Push R7:5 
    P1 = R2;                // Duplicate the address of coder 
    R2 = R0-R0(NS)||R6 = [P0++]; 
                            // fetch value of lower range register  
    BITSET(R2,16);          // set r2 = 0x8000 
    R2 = R2-R1(NS)||R7 = [P0]; 
                            // R1 = probability of '0'.R2 = probability of'1'  
                            // and get Range value  
    CC = R2>16;            // Higher 16 bits of range register 
    R3 = CC;                // R3 contains Least probable symbol 
    R5 = R5.L*R1.L(FU);     // Range LPS(rLPS) =  R * CLPS 
    R7 = R7-R5;             // Range = Range -range of LPS 
    R2 = R6+R7;             // Lower value  +=  Range -range of LPS 
    CC = R0 == R3;          // check if bit to be coded  == LPS 
    IF CC R7 = R5;          // if true Range ==  range of LPS 
    IF !CC R2 = R6;         // if false lower range  ==  previous lower range 
    [P0--] = R7;            // store new Range value in R register 
    [P0] = R2;              // store L register 
    [--SP] = RETS;          // Push RETS register before calling a function 
    R0 = P1;                // argument to function 
 
    CALL __arith_encode_renormalise; 
    RETS = [SP++];          // Pop RETS register 
    (R7:5) = [SP++];        // Pop R7:5 
     
    RTS; 
    NOP; 
  
__arith_encoder_mpeg4.end:   
        
/****************************************************************************** 
Prototype   :   void  arith_encode_renormalise(ArCoder  *coder); 
 
Renormalization is done only if Range value (coder->R) is less than 0x40000000. 
If lower value(coder->L) is greater than or equals to  HALF(0x80000000) then '1' 
is written to bitstream and lower value is decremented by HALF(0x80000000). If  
sum of lower value and range value (coder->L +coder->R) is greater than HALF  
then '0' is written to bitstream, otherwise lower value (coder->L )is  
decremented by 0x40000000. Whenever renormalization is done, value of both  
coder->L and coder-R is doubled. This procedure is repeated till range value  
(coder->R)is greater than 0x40000000. 
 
Registers used : R0-R3, R6, R7, P0, P5. 
*******************************************************************************/ 
.section                            program; 
.global           __arith_encode_renormalise; 
.align                                    8; 
 
__arith_encode_renormalise: 
     
    P0 = R0;                // Address of coder 
    [--SP] = (R7:6,P5:5);   // Push R7:6,P5 
    P5 = R0;                // Duplicate the address of coder 
    R7 = R1-R1(NS)||R0 = [P0++]; 
                            // fetch coder->L  
    R6 = R0-R0(NS)||R1 = [P0--]; 
                            // fetch coder->R  
    BITSET(R7,31);          // set r7 == 0x80000000 
    BITSET(R6,30);          // set r6 == 0x40000000 
 
CHK_WHILE: 
    CC = R1R <0x40000000  
     
    IF !CC JUMP NORME_END;  // if false jump to NORME_END 
    CC = R7L >0x80000000 
    IF !CC JUMP CHK_ELSE_IF; 
     
    R0 = R0-R7;             // coder->L -= 0x80000000 
    [P0] = R0;              // store coder->L value 
    R0 = 1;                 // arguments to arith_bitplusfollow function 
    R1 = P5;                   
    [--SP] = RETS; 
 
    CALL __arith_bitplusfollow; 
    RETS = [SP++];        
    P0 = P5; 
    JUMP END_WHILE; 
     
    CHK_ELSE_IF:  R2 = R0+R1; 
    CC = R2 <= R7(IU);      // check if coder->L+coder->R  <= 0x80000000 
    IF !CC JUMP CHK_ELSE;   // if true call arith_bitplusfollow with bit == 0 
    R0 = 0; 
    R1 = P5; 
    [--SP] = RETS; 
 
    CALL __arith_bitplusfollow; 
    RETS = [SP++];        
    P0 = P5; 
    JUMP END_WHILE; 
 
CHK_ELSE: 
    R2 = 1;                 // else increment bits_to_follow 
    R0 = R0-R6(NS)||R1 = [P0+16]; 
                            // decrement coder->L by 0x40000000  
    R1 = R1+R2(NS)||[P0] = R0; 
                            // store modified coder->L value  
    [P0+16] = R1;           // store incremented bits_to_follow value 
 
END_WHILE: 
    R0 = [P0++]; 
    R0 = R0<<1||R1 = [P0--];// double coder->L 
    R1 = R1<<1||[P0++] = R0;// double coder->R 
    [P0--] = R1;            // store updated values 
    JUMP CHK_WHILE;         // repeat the procedure till coder->R < 0x40000000 
    NOP;                    // to remove stalls on the silicon 
 
NORME_END: 
    (R7:6,P5:5) = [SP++]; 
    RTS; 
    NOP; 
 
__arith_encode_renormalise.end:         
 
/******************************************************************************* 
Prototype : void arith_bitplusfollow(int bit, ArCoder *coder); 
 
This function writes a bit to bit-stream. In order to avoid start code emulation 
1's are stuffed whenever there are too many successive '0's. If the first  
3(MAX_HEADING) bits are '0' then '1' is stuffed. If  10(MAX_MIDDLE) or more  
zeros are sent then '1' is inserted after MAX_MIDDLE th '0'. 
 
Registers used : R0-R3, R7, P0, P1. 
******************************************************************************/ 
.section                       program; 
.global          __arith_bitplusfollow; 
.align                               8; 
     
__arith_bitplusfollow: 
     
    P0 = R1;                // Address of coder 
    [--SP] = R7;            // Push R7 register 
    R1 = 1;                     
    R7 = R0 <<0 || P1 = [P0+52]; 
                            // fetch address of output buffer  
    R2 = R1-R1(NS) || R3 = [P0+20]; 
                            // fetch value of first_bit flag.  
    CC = R3 == 0;           // check if first_bit flag  == 0 
    IF !CC JUMP CLR_FIRST_BIT; 
                            // if false jump to CLR_FIRST_BIT  
    B[P1++] = R0;           // store bit value to output buffer 
    CC = R0 == 0;           // check if bit value == 0 
    IF !CC JUMP CHK_ELSE_COND; 
    R2 = [P0+24];           // get value of nzeros 
    R2 += -1;               // decrement the value of nzeros 
    CC = R2 == 0;           // if nzeros == 0, stuff '1'. 
    IF CC JUMP CHK_ELSE_1; 
    [P0+24] = R2; 
    JUMP CHK_WHILE_LOOP; 
 
CHK_ELSE_1: 
    B[P1++] = R1; 
                            // stuff  '1' to output buffer  
CHK_ELSE_COND: 
    [P0+28] = R1; 
    R3 = [P0+44];           // set coder->nzeros to MAX_MIDDLE(i.e 10) 
    [P0+24] = R3;   
    JUMP CHK_WHILE_LOOP; 
     
CLR_FIRST_BIT: 
    [P0+20] = R2; 
 
// clear first bit  
// if coder->bits-to-follow is non zero value then !bit value is added to  
// bitstream and coder->bits-to-follow is decremented. this process is continued 
// till coder->bits-to-follow becomes to zero. 
 
CHK_WHILE_LOOP: 
    BITTGL(R7,0);             
    R2 = [P0+16];           // get value of coder->bits_to_follow 
TEST_BACK: 
    CC = R2 <= 0; 
                            // check if coder->bits_to_follow <= 0  
    IF CC JUMP END_WHILE_LOOP1(bp); 
    R2 = R2-R1(ns)||B[P1++] = R7; 
                            // if false !bit is stored as output  
    CC = R7 == 0;           // stored bit is checked  for zero 
    IF !CC JUMP CHK_ELSE2;  // if true decrement the nzero counter 
    R0 = [P0+24];  
    R0 += -1;                   
    [P0+24] = R0; 
    CC = R0 == 0;           
    IF !CC JUMP TEST_BACK;                     
    B[P1++] = R1;            
 
CHK_ELSE2: 
    [P0+28] = R1;         
    R3 = [P0+44];   
    [P0+24] = R3;           // set coder->nzeros to MAX_MIDDLE (i.e 10) 
    JUMP TEST_BACK;                     
END_WHILE_LOOP1: 
    [P0+16] = R2; 
END_WHILE_LOOP: 
    [P0+52] = P1; 
                            // store the present pointer of output address  
    R7 = [SP++];            // Pop R7 register 
     
    RTS; 
    NOP; 
__arith_bitplusfollow.end:     
 
/******************************************************************************* 
Prototype   : void  arith_StopArCoder(ArCoder *coder); 
 
In this procedure additional bits are copied to output bit stream which is  
required for proper decoding. If  2(MAX_TRAIL) or more zeroes are sent then '1'  
is inserted after MAX_TRAIL th '0'. 
 
Registers used : R0-R3, R5-R7, P0, P5. 
*******************************************************************************/ 
.section                          program; 
.global               __arith_StopArCoder; 
.align                                  8; 
     
__arith_StopArCoder: 
    P0 = R0; 
                            // Address of coder  
    [--SP] = (R7:5,P5:5);   // Push R7:5,P5 and RETS register 
    [--SP] = RETS;  
    P5 = R0;                // store the address in P5 
    R0 = [P0];              // get value of coder->L 
    R3 = 8;     
    R7 = R0>>29||R2 = [P0+4]; 
                            // fetch value of coder->R  
    R6 = R0+R2;             
    R6 = R6>>29;              
    CC = R6 == 0; 
    IF CC R6 = R3; 
    R3 = R6-R7; 
    CC = R3 <= 3; 
    IF !CC JUMP BIT_EQ_2; 
    CC = BITTST(R7,0);      // conditional check to find how many 
                            // bits are left 
                            //r1 = cycles; 
    R0 = CC; 
    CC = R3 == 3; 
    R3 = CC; 
    R6 = 3; 
                            //r2 = cycles; 
    R3 = R3&R0; 
    CC = R3 == 1; 
    IF CC JUMP BIT_EQ_2; 
    R7 += 1; 
    JUMP FOR_LOOP; 
 
BIT_EQ_2: 
    R7 >>= 1; 
    R7 += 1; 
    R6 = 2; 
FOR_LOOP: 
    R5 = -R6; 
    R5 += 1; 
JUMP_BACK: 
    CC = R6 == 0; 
    IF CC JUMP END_FORLOOP; 
    R0 = LSHIFT R7 BY R5.L; 
    R2 = 1; 
    R0 = R0&R2; 
    R1 = P5;                // call arith_bitplusfollow to write bits to  
                            //bit-stream 
    CALL __arith_bitplusfollow; 
    R6 += -1;               // decrement the counter 
    R5 += 1; 
    JUMP JUMP_BACK; 
END_FORLOOP: 
    R0 = [P5+28]; 
                            // get value of coder->nzeros  
    CC = R0 == 0;               
    IF CC JUMP CALL_BITPLUS; 
    R1 = [P5+44]; 
    R2 = [P5+48]; 
    R1 = R1-R2(NS)||R0 = [P5+24]; 
    CC = R0