Sunday, May 1, 2011

C & Assembly to Perform 16x16 Pixel-Block Motion Estimation

char *btpr; /* pointer to start row of 16x16 pixel block being compressed */
char *cptr; /* pointer to start row of 16x16 pixel reference block */
val = 0;
for (i=0; i<16; i++) {
for (j=0; j<16; j++) {
data = (*(bptr++)- *(cptr++));
if (data<0){val -= data;}
else {val += data;}
}
/* Fast out after this row if best match has been exceeded */
 if (val > best_value) break;
/* Update pointer to next row */
 bptr += (rm->width - 16);
/* Update pointer  to next row */
  cptr += (cm->width - 16);
}

Example 2. Inner-Core Absolute Differences for 16x16 Pixel Block
__asm {
   movdqu xmm0, [m1]
   movdqu xmm1, [m2]
   movdqa xmm2, xmm0
   psubusb xmm0, xmm1
   psubusb xmm1, xmm2
   por xmm0, xmm1
   movdqa xmm1, xmm0
   punpcklbw xmm0, xmm6
   punpcklbw xmm1, xmm6
   movdqa xmm3, xmm1
   pshufd xmm1, xmm0, 238
   pshufd xmm3, xmm0, 68
 paddw xmm1, xmm3
movdqa xmm4, xmm1
pshufd xmm4, xmm4, 78
   paddw xmm1, xmm4

}