int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1, k1_2=k1>>1; int rlim = n >> (l+4), r4, r,r2; int s2lim = 1 << (l+2), s2,s; for (r=r2=r4=0; r < rlim; r4+=4,++r,r2+=2) { for (s=s2=0; s2 < s2lim; s2+=2,++s) { int d0 = n2 - 1 - k0_2*s2-(r*2); int d2 = d0 - k0_2; d[d0 ] = e[d0 ] + e[d2 ]; d[d0-1] = e[d0-1] + e[d2-1]; d[d2 ] = (e[d0 ] - e[d2 ])*A[r*k1] - (e[d0-1] - e[d2-1]) * A[r*k1+1]; d[d2-1] = (e[d0-1] - e[d2-1])*A[r*k1] + (e[d0 ] - e[d2 ]) * A[r*k1+1]; } }