avx2读取3通道图像数据并按照通道分配给对应数据

2022-04-24  本文已影响0人  寽虎非虫003

需求

需求是这样来的,对三通道图像的像素值进行线性运算的时候,直接调用opencv提供的函数很慢,尤其是把通道拆分,而avx这种都是直接从连续内存中load数据比较快,set特定数据比较慢,官方也推荐使用load后再使用shuffle之类的进行顺序变换。

内存变换图

从3通道图像中加载数据.png

当内存成功分成bgr后就方便做后续运算了。

代码

    __m128i one = _mm_loadu_si128((__m128i *)(Src + i));
    __m128i two = _mm_loadu_si128((__m128i *)(Src + i + 16));
    __m128i three = _mm_loadu_si128((__m128i *)(Src + i + 32));

    //⽤_mm_shuffle_epi8来获取BGR数据

    __m128i maskone_b = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0);
    __m128i maskone_g = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1);
    __m128i maskone_r = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 14, 11, 8, 5, 2);

    __m128i dataone_b = _mm_shuffle_epi8(one, maskone_b);
    __m128i dataone_g = _mm_shuffle_epi8(one, maskone_g);
    __m128i dataone_r = _mm_shuffle_epi8(one, maskone_r);

    __m128i masktwo_b = _mm_set_epi8(-1, -1, -1, -1, -1, 14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1);
    __m128i masktwo_g = _mm_set_epi8(-1, -1, -1, -1, -1, 15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1);
    __m128i masktwo_r = _mm_set_epi8(-1, -1, -1, -1, -1, -1, 13, 10, 7, 4, 1, -1, -1, -1, -1, -1);
    __m128i datatwo_b = _mm_shuffle_epi8(two, masktwo_b);
    __m128i datatwo_g = _mm_shuffle_epi8(two, masktwo_g);
    __m128i datatwo_r = _mm_shuffle_epi8(two, masktwo_r);
    __m128i maskthree_b = _mm_set_epi8(13, 10, 7, 4, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
    __m128i maskthree_g = _mm_set_epi8(14, 11, 8, 5, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
    __m128i maskthree_r = _mm_set_epi8(15, 12, 9, 6, 3, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
    __m128i datathree_b = _mm_shuffle_epi8(three, maskthree_b);
    __m128i datathree_g = _mm_shuffle_epi8(three, maskthree_g);
    __m128i datathree_r = _mm_shuffle_epi8(three, maskthree_r);
    __m128i dataB = _mm_or_si128(dataone_b, _mm_or_si128(datatwo_b, datathree_b));
    __m128i dataG = _mm_or_si128(dataone_g, _mm_or_si128(datatwo_g, datathree_g));
    __m128i dataR = _mm_or_si128(dataone_r, _mm_or_si128(datatwo_r, datathree_r));
上一篇 下一篇

猜你喜欢

热点阅读