推 lgen7604:光看這段看不出有什麼問題 我猜大概是跟dimension有關 02/21 20:24
大家好,
請教一下關於CUDA中寫CUFFT,目前參考了板上的高手寫了一個FFTSHIFT,程式如下
float tmp13x, tmp13y;
float tmp24x, tmp24y;
int m2 = NNx/2, n2=NNy/2;
for (int i=0; i <m2; i++)
{
for (int j=0; j <n2; j++)
{
tmp13x = xx[ i*NNx+j ].x;
xx[ i*NNx+j ].x = xx[ (i+m2)*NNx+(j+n2) ].x;
xx[ (i+m2)*NNx+(j+n2) ].x = tmp13x;
tmp13y = xx[ i*NNx+j ].y;
xx[ i*NNx+j ].y = xx[ (i+m2)*NNx+(j+n2) ].y;
xx[ (i+m2)*NNx+(j+n2) ].y = tmp13y;
tmp24x = xx[ (i+m2)*NNx+j ].x;
xx[ (i+m2)*NNx+j ].x = xx[ i*NNx+(j+n2) ].x;
xx[ i*NNx+(j+n2) ].x = tmp24x;
tmp24y = xx[ (i+m2)*NNx+j ].y;
xx[ (i+m2)*NNx+j ].y = xx[ i*NNx+(j+n2) ].y;
xx[ i*NNx+(j+n2) ].y = tmp24y;
}
}
那我今天想改成用CUDA來執行, 如下
__global__ void GFFTSHIFT( cuComplex *xx, float *d_temp13x, float *d_temp13y,
float *d_temp24x, float *d_temp24y )
{
int col = blockIdx.x*blockDim.x+threadIdx.x;
int row = blockIdx.y*blockDim.y+threadIdx.y;
int m2 = NNx/2, n2=NNy/2;
int col2 = (blockIdx.x)*blockDim.x+threadIdx.x+m2;
int row2 = (blockIdx.y)*blockDim.y+threadIdx.y+n2;
d_temp13x[ row*NNx+col ] = xx[ row*NNx+col ].x;
xx[ row*NNx+col ].x = xx[ row2*NNx+col2 ].x;
xx[ row2*NNx+col2 ].x = d_temp13x[ row*NNx+col ];
d_temp13y[ row*NNx+col ] = xx[ row*NNx+col ].y;
xx[ row*NNx+col ].y = xx[ row2*NNx+col2 ].y;
xx[ row2*NNx+col2 ].y = d_temp13y[ row*NNx+col ];
d_temp24x[ row*NNx+col ] = xx[ row2*NNx+col ].x;
xx[ row2*NNx+col ].x = xx[ row*NNx+col2 ].x;
xx[ row*NNx+col2 ].x = d_temp24x[ row*NNx+col ];
d_temp24y[ row*NNx+col ] = xx[ row2*NNx+col ].y;
xx[ row2*NNx+col ].y = xx[ row*NNx+col2 ].y;
xx[ row*NNx+col2 ].y = d_temp24y[ row*NNx+col ];
}
他解出來的圖殘破不堪我只是把原先在的改成col 與row來表示,
想請板上大大指導一下,謝謝
--
※ 發信站: 批踢踢實業坊(ptt.cc)
◆ From: 140.122.192.147