#dlang なぜ、どういう時にSOAを使うべきなのか【翻訳】

この記事は

を自分用に訳したものを許可を得て公開するものである。コードのコメントも一部翻訳されている。ところどころ訳が怪しいところがあるので、なにか見つけた人はcontributeだ!

SoAとは何か?

SoAとは単に配列の構造体(Structure of arrays)を意味します。

//AoS: 構造体の配列(Array of structures)
struct Vec2{
    float x;
    float y;
}
Array!Vec2 vectors;

//SoA: 配列の構造体
struct Vec2{
    float[] x;
    float[] y;
}

なぜSoAは便利なのか?

クライアント・サーバー・アーキテクチャーで小さなudpゲームサーバーを書くことを想像してみてください。あなたにはクライアントが接続できるサーバーがあります。 サーバーは現在接続しているクライアントたちを覚えておく必要があります。サーバーはメッセージをrecvfromでポールし、あなたがudpをよく知らない場合、 recvfromはソケットにバインドされたポートとアドレスへ送られるパケットを返します。

パケットが来たとき、最初に知りたいのはパケットが接続しているクライアントから来たかどうかかもしれません。あなたにはそれをこのように書く傾向があることでしょう:

struct Server{
    struct RemoteClient{
        Address address;
        SysTime lastReceivedPacket;
        //more data
    }
    Array!RemoteClient remoteClients;

    void poll(){
        //Address address
        //recvfrom(buffer, address);
    }
}

どのクライアントがパッケージを送ったのか知りたい時、正しいremoteClientを探すために単にremoteClients配列を使うことができます。問題はアドレスフィールドにしか興味がないが、RemoteClientをイテレートする必要があるということです。これはそれが必要ないとしても、lastReceivedPacketのような他のすべてのデータを不必要にロードしているということを意味します。

そして実際のアプリケーションでRemoteClientの中にどれくらいのデータがありうるか知りたいなら、こちらがEnet Peerの構造体です。これはPeerでありRemoteClientではないためフェアな比較ではないかもしれませんが、これはデータがかなり大きくなるかもしれないということを示しています。

typedef struct _ENetPeer
{ 
   ENetListNode  dispatchList;
   struct _ENetHost * host;
   enet_uint16   outgoingPeerID;
   enet_uint16   incomingPeerID;
   enet_uint32   connectID;
   enet_uint8    outgoingSessionID;
   enet_uint8    incomingSessionID;
   ENetAddress   address;            /**< Internet address of the peer */
   void *        data;               /**< Application private data, may be freely modified */
   ENetPeerState state;
   ENetChannel * channels;
   size_t        channelCount;       /**< Number of channels allocated for communication with peer */
   enet_uint32   incomingBandwidth;  /**< Downstream bandwidth of the client in bytes/second */
   enet_uint32   outgoingBandwidth;  /**< Upstream bandwidth of the client in bytes/second */
   enet_uint32   incomingBandwidthThrottleEpoch;
   enet_uint32   outgoingBandwidthThrottleEpoch;
   enet_uint32   incomingDataTotal;
   enet_uint32   outgoingDataTotal;
   enet_uint32   lastSendTime;
   enet_uint32   lastReceiveTime;
   enet_uint32   nextTimeout;
   enet_uint32   earliestTimeout;
   enet_uint32   packetLossEpoch;
   enet_uint32   packetsSent;
   enet_uint32   packetsLost;
   enet_uint32   packetLoss;          /**< mean packet loss of reliable packets as a ratio with respect to the constant ENET_PEER_PACKET_LOSS_SCALE */
   enet_uint32   packetLossVariance;
   enet_uint32   packetThrottle;
   enet_uint32   packetThrottleLimit;
   enet_uint32   packetThrottleCounter;
   enet_uint32   packetThrottleEpoch;
   enet_uint32   packetThrottleAcceleration;
   enet_uint32   packetThrottleDeceleration;
   enet_uint32   packetThrottleInterval;
   enet_uint32   pingInterval;
   enet_uint32   timeoutLimit;
   enet_uint32   timeoutMinimum;
   enet_uint32   timeoutMaximum;
   enet_uint32   lastRoundTripTime;
   enet_uint32   lowestRoundTripTime;
   enet_uint32   lastRoundTripTimeVariance;
   enet_uint32   highestRoundTripTimeVariance;
   enet_uint32   roundTripTime;
   enet_uint32   roundTripTimeVariance;
   enet_uint32   mtu;
   enet_uint32   windowSize;
   enet_uint32   reliableDataInTransit;
   enet_uint16   outgoingReliableSequenceNumber;
   ENetList      acknowledgements;
   ENetList      sentReliableCommands;
   ENetList      sentUnreliableCommands;
   ENetList      outgoingReliableCommands;
   ENetList      outgoingUnreliableCommands;
   ENetList      dispatchedCommands;
   int           needsDispatch;
   enet_uint16   incomingUnsequencedGroup;
   enet_uint16   outgoingUnsequencedGroup;
   enet_uint32   unsequencedWindow [ENET_PEER_UNSEQUENCED_WINDOW_SIZE / 32]; 
   enet_uint32   eventData;
   size_t        totalWaitingData;
} ENetPeer;

ではSoAではどのようになるか見てみましょう。

struct Server{
    struct RemoteClients{
        size_t length;
        Address[] address;
        SysTime[] lastReceivedPacket;
        //more data
    }
    RemoteClients remoteClients;

    void poll(){
        //Address address
        //recvfrom(buffer, address);
    }
}

すべてのremoteClients.addressのアドレスにアクセスでき、不必要なデータをキャッシュにロードする必要はありません。

SoAは使いにくくないのか?

多くの言語ではそのとおりです。

struct RemoteClients{
    size_t length;
    Address[] address;
    SysTime[] lastReceivedPacket;
    //more data
}

配列を割り当て、それが動的配列の場合は拡張しなければならないために定義は単純化されています。要素の挿入と削除についても気にかける必要もあります、 lastReceivedPacketを追加せずにアドレスだけをRemoteClientsに追加するようなことがあってはいけません。データはゆるく対になっているためです。以前のAoSの時はRemoteClientにremoteClients[index]でアクセスできましたが、今はRemoteClientにその要素remoteClients.addresses[index]やremoteClients.lastReceivedPacket[index]でアクセスします。

DでのSoAの実装

まずデモンストレーションから始めましょう。

struct Vec2{
    float x;
    float y;
}
auto s = SOA!(Vec2)();

s.insertBack(1.0f, 2.0f);
s.insertBack(Vec2(1.0, 2.0f));
writeln(s.x); // [1, 1]
writeln(s.y); // [2, 2]

データで構造体を作ることができ、SOAは構造体をみて内部的に適切な配列を作ります。持っているフィールドと同じ数の配列があるため、insertBackはちょっと普通の配列と異なります。これはinsertBackが可変長である必要があることを意味します。 insertBackはかわりにそれ自身の構造体も受け入れます。

以下のコードは実際に使うことを意図して書かれたコードではなく、単なる概念実証です。

struct SOA(T){
    import std.experimental.allocator;
    import std.experimental.allocator.mallocator;

    import std.meta: staticMap;
    import std.typecons: Tuple;
    import std.traits: FieldNameTuple;

    alias toArray(T) = T[];
    alias toType(string s) = typeof(__traits(getMember, T, s));

    alias MemberNames = FieldNameTuple!T;
    alias Types = staticMap!(toType, MemberNames);
    alias ArrayTypes = staticMap!(toArray, Types);

    this(size_t _size, IAllocator _alloc = allocatorObject(Mallocator.instance)){
        alloc = _alloc;
        size = _size;
        allocate(size);
    }

    ref auto opDispatch(string name)(){
        import std.meta: staticIndexOf;
        alias index = staticIndexOf!(name, MemberNames);
        static assert(index >= 0);
        return containers[index];
    }

    void insertBack(Types types){
        if(length == size) grow;
        foreach(index, ref container; containers){
            container[length] = types[index];
        }
        length = length + 1;
    }

    void insertBack(T t){
        if(length == size) grow;
        foreach(index, _; Types){
            containers[index][length] = __traits(getMember, t, MemberNames[index]);
        }
        length = length + 1;
    }

    size_t length() const @property{
        return _length;
    }

    ~this(){
        if(alloc is null) return;
        foreach(ref container; containers){
            alloc.dispose(container);
        }
    }

private:
    void length(size_t len)@property{
        _length = len;
    }

    Tuple!ArrayTypes containers;
    IAllocator alloc;

    size_t _length = 0;
    size_t size = 0;
    short growFactor = 2;

    void allocate(size_t size){
        if(alloc is null){
            alloc = allocatorObject(Mallocator.instance);
        }
        foreach(index, ref container; containers){
            container = alloc.makeArray!(Types[index])(size);
        }
    }

    void grow(){
        import std.algorithm: max;
        size_t newSize = max(1,size * growFactor);
        size_t expandSize = newSize - size;

        if(size is 0){
            allocate(newSize);
        }
        else{
            foreach(ref container; containers){
                alloc.expandArray(container, expandSize);
            }
        }
        size = newSize;
    }
}

alias toArray(T) = T[];
alias toType(string s) = typeof(__traits(getMember, T, s));

alias MemberNames = FieldNameTuple!T;
alias Types = staticMap!(toType, MemberNames);
alias ArrayTypes = staticMap!(toArray, Types);

MemberNamesは単なるフィールドの名前群です。たとえば構造体 Vec2{float x; float y}は型AliasSeq!("x", "y")になります。 toTypeはメンバ名をとり実際の型にします。例として上のtoType!("x")は型floatを返します。

staticMapの助けを得てメンバ名を実際の型に変換することができます。たとえば上のAliasSeq!("x", "y")はAliasSeq!(float, float)に変わります。

もうすぐ型を配列に変換する必要が出てきます。 AliasSeq!(float, float)をAliasSeq!(float[], float[])にです。それをtoArrayとstaticMapでします。

その後配列のタプルを作ることができます。

Tuple!ArrayTypes containers;

要素の挿入はかなり簡単です。

void insertBack()(Types types){
    if(length == size) grow;
    foreach(index, ref container; containers){
        container[length] = types[index];
    }
    length = length + 1;
}

insertBackが受け入れるべき型はすでにわかっています。構造体のフィールド郡の型をうけいれるべきです。そして配列のタプルであるcontainersをコンパイル時にイテレートします。

それから適切なargumentにtypes[index]でアクセスし、それを配列に挿入するだけです。

構造体そのものを挿入することもできます。

void insertBack(T t){
    if(length == size) grow;
    foreach(index, _; Types){
        containers[index][length] = __traits(getMember, t, MemberNames[index]);
    }
    length = length + 1;
}

indexを取得するために型をイテレートします。 indexを適切なコンテナを得るためと、構造体の適切なフィールド名を見つけるために使います。順序は常に同じためこれは動作します。

上のコードをVec2ようにするとだいたいこんなふうになります。

void insertBack(Vec2 t){
    if(length == size) grow;
    containers[0][length] = t.x;
    containers[1][length] = t.y;
    length = length + 1;
}

配列にフィールド名でアクセスできます。 DでこれはopDispatchによって非常に簡単にできます。

ref auto opDispatch(string name)(){
    import std.meta: staticIndexOf;
    alias index = staticIndexOf!(name, MemberNames);
    static assert(index >= 0);
    return containers[index];
}

上のサンプルをVec2用にした場合ではすべてのxの配列をs.x、すべてのyの配列をs.yで得ることができます。 s.xを呼んだ場合opDispatchはコンパイル時にこんな感じになります。

ref auto opDispatch(){
    import std.meta: staticIndexOf;
    alias index = staticIndexOf!("x", MemberNames);
    static assert(index >= 0);
    return containers[index];
}

MemberNames opDispatch内が失敗しなかった場合、opDispatch nameのインデックスをMemberNamesから取得します。 MemberNames内にあればそのインデックスで適切なコンテナにアクセスします。

struct Server{
    struct RemoteClients{
        Address address;
        SysTime lastReceivedPacket;
        //more data
    }
    SOA!RemoteClient remoteClients;

    void poll(){
        //Address address
        //recvfrom(buffer, address);
    }
}

SoAを使うときはいつか

まず第一にSoAは銀の弾丸ではなく、あなたのコードベースのすべてのAoSをSoAに置き換えるべきというわけではありません。

SoAに意味があるのはこのような時です:

データを1つの配列に保持したくなるとわかっているとき
部分的にデータにアクセスしたいとき

しかし時にはデータのすべてのコンポーネントにアクセスしたい時もあるでしょう。例としてベクトルが挙げられます。

struct Vec3{
    float x;
    float y;
    float z;
}

加算、減算、ドット積、距離その他いろいろのほとんどの操作はすべてのコンポーネントを使うでしょう。たとえ最終的に

struct Vec3{
    float x;
    float y;
    float z;
}

Array!Vec3 positions;

positions[].filter!(v => v.x < 10.0f);

x コンポーネントが10.0fより小さいすべてのベクトルをフィルタしたくなっても、2つのfloatが余分に読み込まれるだけです。 Vec3は大きくならず、これ以外の他のデータ構造が成長し、将来のボトルネックになるかもしれません。

SoAは「時期尚早な最適化」ではないのか?

私の意見は違います。 AoSの問題は、それが将来パフォーマンスボトルネックになった場合、多くのコードをリファクタしなければならないというところです。たとえばあなたはこのように構造体hotとcoldにデータをパックするかもしれません:

struct Bar{
    struct Hot{
        Data1 d1;
        Data2 d2;
        ...
    }
    struct Cold{
        Data3 d3;
        Data4 d4;
        ...
    }

    Hot* hot;
    Cold* cold;
}

しかし言語によってはまだまだ多くのコードをリファクタしなければならないかもしれません。早い段階でデータアクセスについて考えることで手間が省けるかもしれません。

Jonathan Blowには名無し変数とSoAをカバーする言語デモンストレーションがあります。 Quick note:Jonathan BlowのやりかたはDのalias thisとよく似ています。

使う言語によっては、SoAはAoSと比べてそんなに悪くはありません。

//AoS
remoteClients[index].address;

//vs 

//SoA
remoteClients.address[index];

しかしSoAはキャッシュからの関係ない不必要なローディングなしにデータに部分的なアクセスができるため、よりスケールします。