使用SSE _mm_shuffle_ps进行分段故障

时间:2017-08-31 07:40:02

标签: c++ sse

我在我的程序中使用SSE指令来提高它的性能但是在调用_mm_shuffle_ps时经常会崩溃。

我知道很可能是因为对齐需要16字节,但我无法解决这个问题。

这是我使用的代码(我的程序是使用VisualStudio 2017在32位编译的):

#define SHUFFLEMASK(A0,A1,B2,B3) ( (A0) | ((A1)<<2) | ((B2)<<4) | ((B3)<<6) )

inline __m128 RotateVector(const __m128& quaternion, const __m128& vector)
{
    const uint32 shuffleMask = SHUFFLEMASK(3, 3, 3, 3);

    // THE NEXT LINE IS THE ONE CRASHING
    const __m128 qw = _mm_shuffle_ps(quaternion, quaternion, shuffleMask);

    // The rest isn't useful since it crashes before even getting there
    ...
}

inline __m128 MakeVectorRegister(float X, float Y, float Z, float W)
{
    return _mm_setr_ps(X, Y, Z, W);
}

class Vertex
{
public:
    union
    {
        float vec[3];
        struct
        {
            float x, y, z;
        };
    };

    // Rest of class (only methods, no other attributes)
    ...
};

__declspec(align(16)) class X
{
    ...

    __m128 _scale;
    __m128 _rotation;

    ...

    Vertex TransformVector(const Vertex& vector) const
    {
        float __declspec(align(16)) vectorData[3];
        memcpy(vectorData, &vector.x, sizeof(float) * 3);

        // The next line was originally this: const __m128 inputVectorW0 = MakeVectorRegister(((const float*)(&vector.x))[0], ((const float*)(&vector.x))[1], ((const float*)(&vector.x))[2], 0.0f)
        const __m128 inputVectorW0 = MakeVectorRegister(((const float*)(vectorData))[0], ((const float*)(vectorData))[1], ((const float*)(vectorData))[2], 0.0f)

        const __m128 scaledVec = _mm_mul_ps(_scale, inputVectorW0);
        const __m128 rotatedVec = RotateVector(_rotation, scaledVec);

        // The rest isn't useful since it crashes before
        ...
    }
}

// Example of usage
int main(...)
{
    Vertex v;
    X x;

    // This crashes calling _mm_shuffle_ps inside RotateVector
    Vertex result = x.TransformVector(v);
}

0 个答案:

没有答案