Thursday, September 16, 2010

Optimizing Vertex Formats

32 bytes per vertex is optimal for the hardware vertex cache.

We can safely pack normals and tangents to a uint32 each.

Also, I mainly use the second UV set with unique mappings / lightmaps, so it is ensured that the coords always lie within [0,1] - this allows to use D3DDECLTYPE_SHORT2N (the unnormalized SHORT2 type is not supported on my trusty old ATi x700 mobility..). To convert your float UVs you just multiply them with 32767.0f

As a result we get a nice vertexformat like this:

// size = 32 bytes
struct Vertex {
 FVec3 pos;
 VecU32 nrm;
 FVec2 uv;
        short uv2[2];
 VecU32 tan;
};

D3DVERTEXELEMENT9 declExt[] = {
 // stream, offset, type, method, usage, usageIndex
 { 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
 { 0, 12, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_NORMAL, 0 },
 // 2d uv
 { 0, 16, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },
 { 0, 24, D3DDECLTYPE_SHORT2N, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 1 },
 // tangent
 { 0, 28, D3DDECLTYPE_UBYTE4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 2 },
 D3DDECL_END()
};

In your shader just use:

struct VertexInput {
 float4 position : POSITION;
 float4 norm : NORMAL;  // compressed uint32
 float2 uv : TEXCOORD0;
 float2 uv2 : TEXCOORD1;
 float4 tangent : TEXCOORD2; // compressed uint32
};

VertexOutput main(VertexInput IN) {
 ...
 // decompress normal & tangent
 float3 N = 2.0f*IN.norm/255.0f-1.0f;
 float4 T = 2.0f*IN.tangent/255.0f-1.0f;
 ...
}

Here is some C++ code to de-/compress your normals:

class VecU32 {
public:
 union {
  u8 dir[4];
  u32 vec32;
 };

 inline void compress(const FVec3& nrm) {
  dir[0] = (u8)((nrm.x * 0.5f + 0.5f) * 255.0f);
  dir[1] = (u8)((nrm.y * 0.5f + 0.5f) * 255.0f);
  dir[2] = (u8)((nrm.z * 0.5f + 0.5f) * 255.0f);
  dir[3] = 255;
 }

 inline void compress(const FVec4& nrm) {
  dir[0] = (u8)((nrm.x * 0.5f + 0.5f) * 255.0f);
  dir[1] = (u8)((nrm.y * 0.5f + 0.5f) * 255.0f);
  dir[2] = (u8)((nrm.z * 0.5f + 0.5f) * 255.0f);
  dir[3] = (u8)((nrm.w * 0.5f + 0.5f) * 255.0f);
 }

 inline FVec4 decompress() {
  return FVec4(
   2.0f*dir[0]/255.0f-1.0f,
   2.0f*dir[1]/255.0f-1.0f,
   2.0f*dir[2]/255.0f-1.0f,
   2.0f*dir[3]/255.0f-1.0f
   );
 }
};

1 comment: