opencl吧 关注:530贴子:1,575
  • 3回复贴,共1

请教:关于矩阵转置代码的疑惑

只看楼主收藏回复

大家好:
AMD APP SDK安装包里面有个矩阵转置的例子,代码如下:
__kernel
void matrixTranspose(__global float4 * output,
__global float4 * input,
__local float4 * block
)
{
uint wiWidth = get_global_size(0);
uint gix_t = get_group_id(0);
uint giy_t = get_group_id(1);
uint num_of_blocks_x = get_num_groups(0);
// break memory banks dependency by "reshuffling" global indeces
uint giy = gix_t;
uint gix = (gix_t+giy_t)%num_of_blocks_x;
uint lix = get_local_id(0);
uint liy = get_local_id(1);
uint blockSize = get_local_size(0);
uint ix = gix*blockSize + lix;
uint iy = giy*blockSize + liy;
int index_in = ix + (iy)*wiWidth*4;
// coalesced copy from input global memory into LDS
int ind = liy*blockSize*4+lix;
block[ind] = input[index_in];
block[ind+blockSize] = input[index_in+wiWidth];
block[ind+blockSize*2] = input[index_in+wiWidth*2];
block[ind+blockSize*3] = input[index_in+wiWidth*3];
// wait until the whole block is filled
barrier(CLK_LOCAL_MEM_FENCE);
// calculate the corresponding target
// as location inside block of transposed location
ix = giy*blockSize + lix;
iy = gix*blockSize + liy;
int index_out = ix + (iy)*wiWidth*4; ind = lix*blockSize*4+liy;
float4 v0 = block[ind];
float4 v1 = block[ind+blockSize];
float4 v2 = block[ind+blockSize*2];
float4 v3 = block[ind+blockSize*3];
// coalesced copy of transposed data in LDS into output global memory
output[index_out] = (float4)(v0.x, v1.x, v2.x, v3.x);
output[index_out+wiWidth] = (float4)(v0.y, v1.y, v2.y, v3.y);
output[index_out+wiWidth*2] = (float4)(v0.z, v1.z, v2.z, v3.z);
output[index_out+wiWidth*3] = (float4)(v0.w, v1.w, v2.w, v3.w);
}
其中:
// break memory banks dependency by "reshuffling" global indeces
uint giy = gix_t;
uint gix = (gix_t+giy_t)%num_of_blocks_x;
这两句 到底是怎么处理bank conflict的呢?怎么都想不明白啊,哪位可以帮忙解答下啊?
多谢了啊!


IP属地:北京1楼2013-04-11 18:32回复
    这个例子还没看...帮顶下...
    解决不了找吧主,夜恋H诗,别说我叫的哈,怕他削我...


    IP属地:吉林2楼2013-04-12 14:54
    回复
      求大神解答


      IP属地:浙江来自iPad3楼2015-01-13 20:01
      回复
        LZ 楼主有答案了吗? 同求!!!!!


        5楼2018-07-10 12:54
        回复