<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
    which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
There has to be one entity for each item to be referenced.
An alternate method (rfc include) is described in the references. -->

<!ENTITY RFC6330 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6330.xml">
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC5226 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5226.xml">
<!ENTITY RFC8682 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8682.xml">
]>

<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs),
    please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
    (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
    (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="info" docName="draft-irtf-nwcrg-bats-00" ipr="trust200902">
  <!-- category values: std, bcp, info, exp, and historic
       ipr values: trust200902, noModificationTrust200902, noDerivativesTrust200902,
       or pre5378Trust200902
       you can add the attributes updates="NNNN" and obsoletes="NNNN"
       they will automatically be output with "(if approved)" -->

  <!-- ***** FRONT MATTER ***** -->

  <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the
         full title is longer than 39 characters -->

    <title abbrev="BATS Code">BATS Coding Scheme for Multi-hop Data Transport</title>

    <!-- add 'role="editor"' below for the editors if appropriate -->
    <!-- Another author who claims to be an editor -->

    <author fullname="Shenghao Yang" initials="S" surname="Yang">
      <organization>CUHK(SZ)</organization>
      <address>
        <postal>
          <street></street>
          <!-- Reorder these if your country does things differently -->
          <city>Shenzhen</city>
          <region>Guangdong</region>
          <code></code>
          <country>China</country>
        </postal>
        <phone>+86 755 8427 3827</phone>
        <email>shyang@cuhk.edu.cn</email>
        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Xuan Huang" initials="X" surname="Huang">
      <organization>CUHK</organization>
      <address>
        <postal>
          <street></street>
          <!-- Reorder these if your country does things differently -->
          <city>Hong Kong</city>
          <region>Hong Kong SAR</region>
          <code></code>
          <country>China</country>
        </postal>
        <phone>+852 3943 8375</phone>
        <email>1155136647@link.cuhk.edu.hk</email>
        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author surname="R.W. Yeung" fullname="Raymond W. Yeung">
      <organization>CUHK</organization>
      <address>
        <postal>
          <street></street>
          <!-- Reorder these if your country does things differently -->
          <city>Hong Kong</city>
          <region>Hong Kong SAR</region>
          <code></code>
          <country>China</country>
        </postal>
        <phone>+852 3943 8375</phone>
        <email>whyeung@ie.cuhk.edu.hk</email>
        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="John K. Zao" surname="J.K. Zao">
      <!--organization>National Chiao Tung University</organization-->
      <organization>NCTU</organization>
      <address>
        <postal>
          <street></street>
          <!-- Reorder these if your country does things differently -->
          <city>Hsinchu</city>
          <region>Taiwan</region>
          <code></code>
          <country>China</country>
        </postal>
        <phone></phone>
        <email>jkzao@ieee.org</email>
        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <date year="2021" month="February" day="21" />

    <!-- If the month and year are both specified and are the current ones, xml2rfc will fill
         in the current day for you. If only the current year is specified, xml2rfc will fill
         in the current day and month for you. If the year is not the current one, it is
         necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the
         purpose of calculating the expiry date).  With drafts it is normally sufficient to
         specify just the year. -->

    <!-- Meta-data Declarations -->

    <area>General</area>
    <workgroup>Internet Engineering Task Force</workgroup>

    <!-- WG name at the upperleft corner of the doc,
         IETF is fine for individual submissions.
         If this element is not present, the default is "Network Working Group",
         which is used by the RFC Editor as a nod to the history of the IETF. -->

    <keyword>BATS code</keyword>
    <keyword>multi-hop</keyword>

    <!-- Keywords will be incorporated into HTML output
         files in a meta tag but they have no effect on text or nroff
         output. If you submit your draft to the RFC Editor, the
         keywords will be used for the search engine. -->

    <abstract>
      <t>BATS code is a class of efficient linear network coding scheme with a matrix generalization of fountain codes as the outer code, and batch-based linear network coding as the inner code. This document describes a baseline BATS coding scheme for communication through multi-hop networks, and discusses the related research issues towards a more sophisticated BATS coding scheme.</t>
    </abstract>
  </front>

  <middle>
    <section title="Introduction">
      <t>This document specifies a baseline <xref target="Yang14">BATS code</xref> scheme for data delivery in multi-hop networks, and discusses the related research issues towards a more sophisticated scheme. The BATS code described here includes an outer code and an inner code. The outer code is a matrix generalization of fountain codes (see also the RapterQ code described in <xref target="RFC6330">RFC&nbsp;6330</xref>), which inherits the advantages of reliability and efficiency and possesses the extra desirable property of being network coding compatible. The inner code, also called recoding, is formed by linear network coding for combating packet loss, improving the multicast efficiency, etc. A detailed design and analysis of BATS codes are provided in the <xref target="Yang17">BATS monograph</xref>.</t>

      <t>A BATS coding scheme can be applied in multi-hop networks formed by wireless communication links, which are inherently unreliable due to interference. Existing transport  protocols like TCP use end-to-end retransmission, while network protocols such as IP might enable store-and-forward at the relays, so that packet loss would accumulate along the way.</t>

      <t>A BATS coding scheme can be used for various data delivery applications like file transmission, video streaming over wireless multi-hop networks, etc. Different from traditional forward error correcting (FEC) schemes that are applied either hop-by-hop or end-to-end, the BATS coding scheme combines the end-to-end coding (the outer code) with certain hop-by-hop coding (the inner code), and hence can potentially achieve better performance.</t>
      
      <t>The baseline coding scheme described here considers a network with multiple communication flows. For each flow, the source node encodes the data for transmission separately. Inside the network, however, it is possible to mix the packets from different flows for recoding. In this document, we describe a simple case where recoding is performed within each flow. Note that the same encoding/decoding scheme described here can be used with different recoding schemes as long as they follow the principle as we illustrate in this document.</t>
      
      <t>The purpose of the baseline BATS coding scheme is twofold. First, it provides researchers and engineers a starting point for developing network communication applications/protocols based on BATS codes. Second, it helps to make the research issues more clear towards a sophisticated BATS code based network protocol. Important research directions include the security issues, congestion control and routing algorithms for BATS codes, etc. </t>
      <section title="Requirements Language">
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
        "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
        document are to be interpreted as described in <xref
        target="RFC2119">RFC 2119</xref>.</t>
      </section> <!--Requirements Language-->
    </section> <!--Introduction-->

    <section anchor="procedures" title="Procedures">
      <section title="Introduction">
        <t>
          A BATS coding scheme includes an outer code encoder (also called encoder), an inner code encoder (also called recoder) and a decoder.
          The BATS coding scheme can be used for a single data flow that includes a single source and one or multiple destinations. Thus there exists only one encoder with multiple recoders and decoders.
          The BATS coding scheme described in this document can be used by a Data Delivery Protocol (DDP) with the following procedures.
          <list>
            <t>Outer Code Encoding at a source node which has the data for transmission:
            <list style="symbols">
              <t>The DDP provides the data to be delivered and the related information to the BATS encoder.</t>
              <t>The BATS encoder generates a sequence of batches, each consisting of a set of coded packets and the
              information pertaining to the batch.</t>
            </list>
            </t>
	    <t>The batches generated at the source node are further recoded before transmitting:
	    <list style="symbols">
              <t>A BATS recoder generates recoded packets of a batch.</t>
              <t>The DDP forms and transmits the DDP packets using the batches and the corresponding batch information.</t>
            </list>
	    </t>
            <t>Recoding at an intermediate node that does not need the data:
            <list style="symbols">
              <t>The DDP extracts the batches and the corresponding batch information from its received DDP packets.</t>
              <t>A BATS recoder generates recoded packets of a batch.</t>
              <t>The DDP forms and transmits DDP packets using the recoded packets and the corresponding batch information.</t>
            </list>
            </t>

            <t>Decoding at a destination node that needs the data:
            <list style="symbols">
              <t>The DDP extracts the batches and the corresponding batch information from its received DDP packets.</t>
              <t>A BATS  decoder tries to recover the transmitted data using the received batches.</t>
              <t>The DDP sends the decoded data to the application that needs the data.</t>
            </list>
            </t>
          </list>
        </t>
      </section> <!--Introduction-->

      <section title="Data Delivery Procedures">
	    <t>Suppose that the DDP has F octets of data for transmission. We describe the procedures of one BATS session for transmitting the F octets. There is a limit on F of a single BATS session. If the total data has more than the limit, the data needs to be transmitted using multiple BATS sessions. The limit on F of a single BATS session depends on the MTU (maximum transmission unit) of the network, which MUST be known by the DDP. We have F is no more than (MTU-10)2^16-1 octets.</t>
	    <section title="Source Node Data Partitioning and Padding">
	      <t>
            The DDP first determines the following parameters:
	        <list style="symbols">
              <t>Batch size (M): the number of coded packets in a batch.</t>
              <t>Recoding field size (q): the number of elements in the finite field for recoding. q is 2 or 2^8</t>
              <t>BATS payload size (TO): the number of payload octets in a BATS packet, including the coded data and the coefficient vector.</t>
            </list>
	      </t>
	      <t> Based on the above parameters, the parameters T, O and K are calculated as follows:
	      <list style="symbols">
	        <t>O: the number of octets of a coefficient vector, calculated as O = ceil(M*log2(q)/8).</t>
	        <t>T: the number of data octets of a BATS packet, calculated as T = TO - O.</t>
	        <t>K: number of source packets, calculated as K = floor(F/T)+1. </t>
	      </list>
	      </t>
	      <t>
	        The data MUST be padded to have T*K octets, which will be partitioned into K source packets b[0], ..., b[K-1], each of T octets.
	        In our padding scheme, b[0], ..., b[K-2] are filled with data bits, and b[K-1] is filled with the remaining data octets and padding octets.
	      Let P = K*T-F denote the number of padding octets. We use b[K-1, 0], ..., b[K-1, T-P-1] to denote the T-P source octets and b[K-1, T-P], ..., b[K-1, T-1] to denote the P padding octets in b[K-1], respectively. The padding process is shown in <xref target="data_padding" />.</t>
      <figure anchor="data_padding" title="Data Padding Process">
      	  <artwork><![CDATA[
      Z = T - P
      Let bl be the last source packet b[K-1]
      for i = 1, 2, ... do
        if Z + i >= T - 1 do
            bl[Z...T-1] = i
            break
        bl[Z...Z+i-1] = i
        Z = Z + i
             ]]></artwork>
      	</figure>
	  </section> <!--Padding-->

	  <section title="Source Node Outer Code Encoding Procedure">
        <t>
          The DDP provides the BATS encoder with the following information:
          <list style="symbols">
            <t>Batch size (M): the number of coded packets in a batch.</t>
            <t>Recoding field size (q): the number of elements in the finite field for recoding.</t>
            <t>MAX_DEG: the size of DD.</t>
            <t>The degree distribution (DD), which is an unsigned integer array of size MAX_DEG+1.</t>
            <t>A sequence of batch IDs (j, j = 0, 1, ...).</t>
            <t>Number of source packets (K).</t>
            <t>Packet size (T): the number of octets in a source packet.</t>
            <t>The source packets (b[i], i = 0, 1, ..., K-1).</t>
          </list>
          Using this information, the (outer code) encoder generates a batch for each batch ID. For the batch ID j, the encoder returns the DDP that contains
          <list style="symbols">
            <t>a sparse degree d[j], and</t>
            <t>M coded packets (x[j,i], i =0, 1, ..., M-1), each containing TO octets.</t>
          </list>
        </t>
        <t>
          The DDP will use the batches to form DDP packets to be transmitted to other network nodes towards the destination nodes. The DDP MUST deliver with each coded packet its
          <list style="symbols">
            <t>d: sparse degree</t>
            <t>BID: batch ID</t>
          </list>
          The DDP MUST deliver the following information to each recoder:
          <list style="symbols">
            <t>M: batch size M</t>
            <t>q: recoding field size</t>
          </list>
          The DDP MUST deliver the following information to each decoder:
          <list style="symbols">
            <t>M: batch size</t>
            <t>q: recoding field size</t>
            <!--t>F: the data size</t-->
            <t>K: the number of source packets</t>
            <t>T: the number of octets in a source packet</t>
          </list>
          The BID is used by both recoders and decoders. The BATS payload size TO MUST be known by all the nodes.
        </t>
	    <t>
	      The DDP will also include some necessary extra information in the packet header so that the network nodes can identify different BATS sessions, and different end-to-end communication flows. However, such specifications are beyond the scope of this document.
	    </t>
      </section>

      <section title="Recoding Procedures">
        <t>
	      Both the source node and the intermediate nodes perform recoding on the batches before transmission. At the source node, the recoder receives the batches from the outer code encoding procedure. At an intermediate node, the DDP receives the DDP packets from the other network nodes, and should be able to extract coded packets and the corresponding batch information from these packets.
        </t>
        <t>
          The DDP provides the recoder with the following information:
          <list style="symbols">
            <t>the batch size M,</t>
            <t>the recoding field size q,</t>
            <t>a number of received coded packets of the same batch, each containing TO octets, and</t>
	        <t>link statistics, e.g., packet loss rates.</t>
          </list>
        </t>
        <t>
	      For a received batch, the recoder determines a positive integer Mr, the number of recoded packets to be transmitted for the batch. The recoder uses the information provided by the DDP to generate Mr recoded packets, each containing TO octets. The DDP uses the Mr recoded packets to form the DDP packets for transmitting.
        </t>
      </section>

      <section title="Destination Node Procedures">
        <t>
          A destination node needs the data transmitted by the source node. At the destination node, the DDP receives DDP packets from the other network nodes, and should be able to extract coded packets and the corresponding batch information from these packets.
        </t>

        <t>
          The DDP provides the decoder with the following information:
          <list style="symbols">
            <!-- <t>F: number of octets in the data,</t> -->
            <t>M: batch size,</t>
            <t>q: recoding field size,</t>
            <t>K: the number of source packets</t>
            <t>T: the number of octets of a source packet</t>
            <t>A sequence of batches, each of which is formed by a number of coded packets belonging to the same batch, with their corresponding batch IDs and degrees.</t>
          </list>
        </t>
	    <t> The decoder uses this information to decode the K source packets. If successful, the decoder returns the recovered K source packets to the DDP, which will use the K source packets to form the F octets data. The recommended padding process is shown as follows:</t>
        <figure anchor="data_depadding" title="Data Depadding Process">
      	  <artwork><![CDATA[
    // this procedure returns the number P of padding octets
    // at the end of b[K-1]
    Let bl be the last decoded source packet b[K-1]
    PL = bl[T-1]
    if PL == 1 do
        return P = 1
    WI = T - 1
    while bl[WI] == PL do
        WI = WI - 1
    return P = (1 + bl[WI]) * bl[WI] + T - WI - 1
             ]]></artwork>
      	</figure>
      </section>
    </section>

    <section title="Recommendation for the Parameters">
      <t>
        The recommendation for the parameters M and q is shown as follows:
        <list style="symbols">
          <t>When q=2, M=16,32,64</t>
          <t>When q=256, M=8,16,32,64</t>
        </list>
        It is RECOMMENDED that K is at least 128. However, the encoder/decoder SHALL support an arbitrary positive integer value less than 2^16.
      </t>
    </section> <!--Recommendation for the Parameters-->

    <section title="Example DDP Packet Format">
      <t>A DDP can form a DDP packet with a header (5 octets), a footer (3 octets) and a payload (TO octets). A DDP packet has totally 8+TO octets.</t>
      <section title="Packet Header">
        <t> The BATS packet header has 40 bits (5 octets) and includes fields Packet_Count, Mq, Batch_ID, and Degree.</t>
        <figure anchor="packet_header" title="BATS packet header format.">
            <artwork><![CDATA[
    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |           Packet_Count        |  Mq   |       Batch_ID        |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |     Degree    |
    +-+-+-+-+-+-+-+-+
            ]]></artwork>
        </figure>
        <t><list style="symbols">
          <t>Packet_Count: 16-bit unsigned integer, specifying the number K of packets of the BATS session.</t>
          <t>Mq: 4-bit unsigned integer to specify the value of M and q as <xref target="Mq_value" />.</t>
          <t>Batch_ID: 12-bit unsigned integer, specifying the batch ID BID of the batch the packet belonging to.</t>
          <t>Degree: 8-bit unsigned integer, specifying the batch degree d of the batch the packet belonging to.</t>
        </list></t>
	    <texttable anchor="Mq_value" title="Values of Mq field">
	      <ttcol>Mq</ttcol><ttcol>M</ttcol><ttcol>q</ttcol><ttcol>O</ttcol>
	      <!--c>0000</c><c>1</c><c>-</c><c>-</c-->
	      <c>0010</c><c>16</c><c>2</c><c>2</c>
	      <c>0100</c><c>32</c><c>2</c><c>4</c>
	      <c>0110</c><c>64</c><c>2</c><c>8</c>
	      <c>0001</c><c>8</c><c>256</c><c>8</c>
	      <c>0011</c><c>16</c><c>256</c><c>16</c>
	      <c>0101</c><c>32</c><c>256</c><c>32</c>
	      <c>0111</c><c>64</c><c>256</c><c>64</c>
	    </texttable>
	  </section>

      <section title="Packet Payload">
        <figure anchor="packet_payload" title="BATS packet payload format."><artwork><![CDATA[
                  O                         T
      +-----------------------+-------------------------------+
      |   coefficient vector  |          coded data           |
      +-----------------------+-------------------------------+
          ]]></artwork>
        </figure>
        <t>
          The payload has TO octets, where the first O octets contain the coefficient vector and the remaining T octets contain the coded data.
          Information in both fields MAY be encoded in JSON (ASCII) or protobuf (binary) formats.
          <list style="symbols">
            <t>coefficient vector: O octets. The range of the value of O is in <xref target="Mq_value" />.</t>
            <t>coded data: T octets. T is at most MTU - 10, where 10 is the total of the header and footer length plus the minimum value of O.</t>
        </list></t>
      </section>

      <section title="Packet Footer">
        <figure anchor="packet_footer" title="BATS packet footer format."><artwork><![CDATA[
      0                   1                   2
      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
      |            signature          |  parity check |
      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
      ]]></artwork>
        </figure>
        <t>
          The footer has three octets.
          <list style="symbols">
            <t>signature: 2 octets. A signature of the individual packet to prevent pollution attack.</t>
            <t>parity check: 1 octet. A parity check field used to verity the correctness of the packet.</t>
          </list>
        </t>
      </section>
    </section>
  </section>

  <?rfc needLines="8" ?>

  <section anchor="specification" title="BATS Code Specification">
      <section anchor="common" title="Common Parts">
        <t>
          The T octets of a source packets are treated as a column vector of T elements in GF(256). Linear algebra and matrix operations over finite fields are assumed in this section.
        </t>

        <t> Suppose that a pseudorandom number generator Rand() which generates an unsigned integer of 32 bits is shared by both encoding and decoding. The pseudorandom generator can be initialized by Rand_Init(S) with seed S. When S is not provided, the pseudorandom generator is initialized arbitrarily. One example of such a pseudorandom generator is defined in <xref target="RFC8682">RFC&nbsp;8682</xref>.</t>

	    <t>A function called BatchSampler is used in both encoding and decoding. The function takes two integers j and d as input, and generates an array idx of d integers and a d x M matrix G.  The function first initializes the pseudorandom generator with j, sample d distinct integers from 0 to K-1 as idx, and sample d*M integers from 0 to 255 as G. See the pseudocode in <xref target="batch_sampler" />. </t>
	    <figure anchor="batch_sampler" title="Batch Sampler Function">
	      <artwork><![CDATA[
function BatchSampler(j,d)
    // initialize the pseudorandom generator by seed j.
    Rand_Init(j)
    // sample d distinct integers between 0 and K-1.
    for k = 0, ..., d-1 do
        r = Rand() % K
        while r already exists in idx do
            r = Rand() % K
        idx[k] = r

    // sample d x M matrix
    for r = 0, ..., d-1 do
        for c = 0,...,M-1 do
            G[r,c] = Rand() % 256

    return idx, G
        ]]></artwork>
	    </figure>
      </section>

      <section anchor="encoder" title="Outer Code Encoder">
	    <t>Define a function called DegreeSampler that return an integer d using the degree distribution DD. We expect that the empirical distribution of the returning d converges to DD(d) when d &#60; K. One design of DegreeSampler is illustrated in <xref target="degree_sampler" />.
	    </t>
	    <figure anchor="degree_sampler" title="Degree Sampler Function">
	      <artwork><![CDATA[
function DegreeSampler(j, DD)
    Let CDF be an array
    CDF[0] = 0
    for i = 1, ..., MAX_DEG do
        CDF[i] = CDF[i-1] + DD[i]
    Rand_Init()
    r = Rand() % CDF[MAX_DEG]
    for d = 1, ..., MAX_DEG do
        if r >= CDF[d] do
            return min(d,K)
    return min(MAX_DEG,K)
       ]]></artwork>
	    </figure>
        <t>
          Let b[0], b[1], ..., b[K-1] be the K source packets. A batch with BID j is generated using the following steps.
	      <list style="symbols">
	        <t>Obtain a degree d by calling DegreeSampler with input j.
            </t>
            <t>Obtain idx and G[j] by calling BatchSampler with input j and d.
            </t>
            <t>Let B[j] = (b[idx[0]], b[idx[1]], ..., b[idx[d-1]]). Form the batch X[j] = B[j]*G[j], whose dimension is T x M.
            </t>
            <t>Form the TO x M matrix Xr[j], where the first O rows of Xr[j] form the M x M identity matrix I with entries in GF(q), and the last T rows of Xr[j] is X[j].
            </t>
	      </list>
        </t>

	    <t>See the pseudocode of the batch generating process in <xref target="gen_batch" />.</t>

	    <figure anchor="gen_batch" title="Batch Generation Function">
	      <artwork><![CDATA[
function GenBatch(j)
    d = DegreeSampler(j)
    (idx, G) = BatchSampler(j,d)
    B = (b[idx[0]], b[idx[i]], ..., b[idx[d-1]])
    X = B * G
    Xr = [I_M; X]
    return Xr
       ]]></artwork>
	    </figure>
      </section>
      <section anchor="recoder" title="Inner Code Encoder (Recoder)">
        <t>
          The inner code comprises (random) linear network coding applied on the coded packets belonging to the same batch.
          At a particular network node, recoded packets are generated by (random) linear combinations of the received coded packets of a batch.
          The recoded packets have the same BID, sparse degree and coded packet length.
        </t>

	    <t>
	      The number Mr of recoded packets for a batch is decided first by the recoder. Mr can be set as M.
	      When the link statistics is known, the recoder can try to obtain the link packet loss rate e for the link to transmit the recoded batch, and set Mr to be (1+e)M.
	    </t>

        <t>
          Suppose that coded packets xr[i], i = 0, 1, ..., r-1, which have the same BID j, have been received at an intermediate node. Using the recommended packet format, it can be verified whether the corresponding packet headers of these coded packets are the same. Then a recoded packet can be generated by one of the following two approaches:
          <list style="symbols">
            <t>forwarding: when receiving xr[i], directly use xr[i] as a recoded packet.</t>
            <t>linear combination recoding: (randomly) choose a sequence of coefficients c[i], i = 0, 1, ..., r-1 from GF(q). Generate c[0]xr[0]+c[1]xr[1]+...+c[r-1]xr[r-1] as a recoded packet.</t>
          </list>
          A recoder can combine these two approaches to generate recoded packets. For example, the recoder will output xr[i], i = 0, 1, ..., r-1 as r systematic recoded packets and generate Mr-r recoded packets using linear combinations of randomly chosen coefficients.
        </t>
      </section>
      <section anchor="bp" title="Belief Propagation Decoder">
        <t> The decoder receives a sequence of batches Yr[j], j = 0, 1, ..., n-1, each of which is a TO-row matrix over GF(256). The degree d[j] of batch j is also known. Let Y[j] be the submatrix of the last T rows of Yr[j]. When q = 256, let H[j] be the first M rows of Yr[j]; when q = 2, let H[j] be the matrix over GF(256) formed by embedding each bit in the first M/8 rows of Yr[j] into GF(256).</t>

	    <t> By calling BatchSampler with input j and d[j], we obtain idx[j] and G[j]. According to the encoding and recoding processes described in <xref target="encoder" /> and <xref target="recoder" />, we have the system of linear equations Y[j] = B[j]G[j]H[j] for each received batch with ID j, where B[j] = (b[idx[j,0]], b[idx[j,1]], ..., b[idx[j,d-1]]) is unknown.
	    </t>

        <t> We describe a belief propagation (BP) decoder that can efficiently solve the source packets when a sufficient number of batches have been received. A batch j is said to be decodable if rank(G[j]H[j]) = d[j] (i.e., the system of linear equations Y[j] = B[j]G[j]H[j] with B[j] as the variable matrix has a unique solution). The BP decoding algorithm has multiple iterations. Each iteration is formed by the following steps:
        <list style="symbols">
          <t> Decoding step: Find a batches j that is decodable. Solve the corresponding system of linear equations Y[j] = B[j]G[j]H[j] and decode B[j].</t>
          <t> Substitution step: Substitute the decoded source packets into undecodable batches. Suppose that a decoded source packet b[k] is used in generating a undecodable Y[j]. The substitution involves 1) removing the entry in idx[j] corresponding to k, 2) removing the row in G[j] corresponding to b[k], and 3) reducing d[j] by 1. </t>
        </list>
        The BP decoder repeats the above steps until no batches are decodable during the decoding step.
        </t>
      </section>
    </section>


    <section anchor="research" title="Research Issues">
      <t>The baseline BATS coding scheme described in <xref target="procedures" /> and <xref target="specification" /> needs various refinement and complement towards a more sophisticated network communication application. Various related research issues are discussed in this section, but the security related issues are left to <xref target="Security" />. </t>
      <section anchor="coding" title="Coding Design Issues">
	<t>The BATS code specification in <xref target="specification" /> has nearly optimal throughput when the number of source packets K is sufficiently large. But when K is small, the degree sampler function in <xref target="degree_sampler" /> and the BatchSampler function in <xref target="batch_sampler" /> based on a pseudorandom generator may not sample all the source packets evenly, so that some of the source packets are not well protected. One approach to solve this issue is to generate a deterministic degree sequence when the number of batches is relatively small, and design a special pseudorandom generator that has a good sampling performance when K is small.</t>
	<t>The belief propagation decoder in <xref target="bp" /> guarantees the recovery of a given fraction of the source packets. To recover all the source packets, a precode can be applied to the source packets to generate a fraction of redundant packets before applying the outer code encoding. Moreover, when the belief propagation decoder stops, it is possible to continue with inactivation decoding, where certain source packets are treated inactive so that a similar belief propagation process can be resumed. The reader is referred to <xref target="RFC6330">RFC&nbsp;6330</xref> for the design of a precode with a good inactivation decoding performance. </t>
	<t>There are research issues related to recoding discussed in <xref target="recoder" />. One question is how many recoded packets to generate for each batch. Though it is asymptotically optimal when using the same number of recoded packets for all batches, it has been shown that transmitting a different number of recoded packets for different batches can improve the recoding efficiency. The intuition is that for a batch with a lower rank, a smaller number of recoded packets need to be transmitted. This kind of recoding scheme is called <xref target="Yin19">adaptive recoding</xref>.</t>
	<t>Packet loss in network communication is usually bursty, which may harm the recoding performance. One way to resolve this issue is to transmit the packets of different batches in a mixed order, which is also called <xref target="Yin20">batch interleaving</xref>. How to efficiently interleave batches without increasing too much end-to-end latency is a research issue.</t>
	<t> Though we only focus on the BATS coding scheme with one source node and one destination node, a BATS coding scheme can be used for multiple source and destination nodes. To benefit from multiple source nodes, we would need different source nodes to generate statistically independent batches. For communicating the same data to multiple destination nodes, which is also call multicast, it is well-known that <xref target="Li03">linear network coding</xref> achieves the mulicast capacity. BATS codes can benefit from network coding due to its inner code, but how to efficiently implement multicast needs further research.</t>
      </section>
      <section anchor="protocol" title="Protocol Design Issues">
	    <t>The baseline scheme in this document focuses on the reliable communication. There are other issues to be considered towards designing a fully functionally DDP based on a BATS coding scheme. Here we discuss some network management issues that are closely related to a BATS coding scheme: routing, congestion control and media access control.</t>
	    <t>The outer code of a BATS code can be regarded as a channel code for the channel induced by the inner code, and hence the network management algorithms should try to maximize the capacity of the channel induced by the inner code. A <xref target="Dong20">network utility maximization problem</xref> for BATS coding can be applied to study routing, congestion control and media access control jointly. Compared with the network utility maximization for Internet, there are two major differences. First, the network flow rate is not measured by the rate of the raw packets. Instead, a rank based measurement induced by the inner code is applied for BATS coding schemes. Second, due to recoding, the raw packet rate of a flow may not be the same for different links, i.e., no flow conservation for BATS coding schemes. These differences affect both the objective and the constraints of the utility maximization problem. </t>
	    <t>Practical congestion control, routing and media access control algorithms for BATS coding schemes deserve more research efforts. Due to the recoding operation, congestion control cannot be only performed end-to-end. The rate of transmitting batches can be controlled end-to-end, but the number of recoded packets generated for a batch must be controlled at the intermediate nodes, which introduces new research issues for congestion control. For routing, the BATS coding scheme is flexible for implementing multi-path data transmission, and different batches can be transmitted on a different path between a source node and a destination node. Under the scenario of BATS coding schemes, media access control can have some different considerations: Retransmission is not necessary, and a reasonably high packet loss rate can be tolerated. </t>
      </section>
      <section anchor="application" title="Application Related Issues">
	    <t>There are more researche issues pertaining to different applications. The reliable communication technique provided by BATS codes can be used for a broad range of network communication scenarios. In general, a BATS coding scheme is suitable for data delivery in networks with multiple hops and unreliable links.</t>
	    <t>One class of typical application scenario is <xref target="Toh02">wireless mesh and ad hoc networks</xref>, including vehicular networks, wireless sensor networks, smart-lamppost networks, etc. These networks are characterized by a large number of network devices connected wirelessly with each other without a centralized network infrastructure. A BATS coding scheme is suitable for high data load delivery in such networks without the requirement that the point-to-point/one-hop communication is highly reliable. Therefore, employing a BATS coding scheme can provide more freedom for media access control, including power control so that the overall network throughput can be improved. </t>
	    <t>Another typical application scenario of BATS coding schemes is <xref target="Sprea19">underwater acoustic networks</xref>, where the propagation delay of acoustic waves in underwater can be as long as several seconds. Due to the long delay, feedback based mechanisms become inefficient. Moreover, point-to-point/one-hop underwater acoustic communication (for both the forward and reverse directions) is highly unreliable. Due to these reasons, traditional networking techinques developed for radio and wireline networks cannot be directly applied to underwater networks. As a BATS coding scheme does not rely on the feedback for reliability communication and can tolerate highly unreliable links, it makes a good candidate for developing data delivery protocols for underwater acoustic networks.</t>
	    <t>Last but not least, due to its capability of performing multi-source, multi-destination communications, a BATS coding scheme can be applied in various content distribution scenarios. For example, a BATS coding scheme can be a candidate for the erasure code used in the <xref target="Byers20">liquid data networking framework</xref> of CCN (content centric networking), and provides the extra <xref target="Zhang16">benefit of network coding</xref>. </t>
      </section>
    </section>

    <!-- This PI places the pagebreak correctly (before the section title) in the text output. -->

    <?rfc needLines="8" ?>


    <!--section anchor="Acknowledgements" title="Acknowledgements">
        <t></t>
        </section-->

    <!-- Possibly a 'Contributors' section ... -->

    <section anchor="IANA" title="IANA Considerations">
      <t>This memo includes no request to IANA.</t>
    </section>

    <section anchor="Security" title="Security Considerations">
      <t>
        Subsuming both Random Linear Network Codes (RLNC) and fountain codes, BATS codes naturally inherit both their desirable capability of offering confidentiality protection as well as their vulnerability towards pollution attacks.
      </t>
      <section title="Provision of Confidentiality Protection">
        <t>
          Since the transported messages are linearly combined with random coefficients at each recoding node, it is statistically impossible to recover the individual messages by capturing the coded messages at any one or small number of nodes. As long as the coding matrices of the transported messages cannot be fully recovered, any attempt of decoding any particular symbol of the transported messages is equivalent to random guessing <xref target="Bhattad05"></xref>.
        </t>
        <t>
          The threat towards confidentiality, however, also exists in the form of eavesdropping on the initial encoding process, which takes place at the encoding nodes. In these nodes, the transported data are presented in plain text and can be read along their transfer paths. Hence, information isolation between the encoding process and all other user processes running on the node must be assured.
        </t>
        <t>
          In addition, the authenticity and trustworthiness of the encoding, recoding and decoding program running on all the nodes must be attested by a trusted authority. Such a measure is also necessary in countering pollution attacks.
        </t>
      </section>
      <section title="Countermeasures against Pollution Attacks">
        <t>Like all network codes, BATS codes are vulnerable under pollution attacks. In these attacks, one or more compromised coding node(s) can pollute the coded messages by injecting forged messages into the coding network and thus prevent the receivers from 
   recovering the transported data correctly.
	    </t>
        <t>The research community has long been investigating the use of various signature schemes (including homomorphic signatures) to identify the forged messages and stall the attacks (see <xref target="Zhao07"></xref>, <xref target="Yu08"></xref>, <xref target="Agrawal09"></xref>). However, these countermeasures are regarded as being too computationally expensive to be employed in broadband communications. Hence, a system-level approach based on <xref target="TC-Wikipedia">Trusted Computing</xref> is proposed as a practical alternative to protect BATS codes against pollution attacks. This Trusted Computing based protection consists of the following countermeasures:
        <list style="numbers">
          <t> Attestation and Validation of all BATS encoding, recoding and decoding
       nodes in the network.  Remote attestation and repetitive validation of
       the identity and capability of these node based on valid public key certificates with proper authorization MUST be a pre-requisite for admitting these nodes to a network and permitting them to remain on that network.</t>
          <t> Attestation of all encoding, recoding and decoding programs used in the coding nodes. All programs used to perform the BATS encoding, recoding and decoding processes MUST be remotely attested before they are permitted to run on any of the coding nodes. Reloading or alteration of programs MUST NOT be permitted during an encoding session. Programs MUST be attested or validated again when they are executed in new execution environments instantiated even in the same node.</t>
          <t> Original Authentication of all coded messages using network level security protocols such as IPsec or Peer Authentication over session-based communication using transport level security protocols such as TLS/DTLS MUST be employed in order to provide Message Origin or Communication Peer Authentication to every coded message sent through the coding network.</t>
        </list>
        </t>
      </section>
    </section>
  </middle>

    <!--  *****BACK MATTER ***** -->

    <back>
      <!-- References split into informative and normative -->
      <!-- There are 2 ways to insert reference entries from the citation libraries:
         1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
         2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
            (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

         Both are cited textually in the same manner: by using xref elements.
         If you use the PI option, xml2rfc will, by default, try to find included files in the same
         directory as the including file. You can also define the XML_LIBRARY environment variable
         with a value containing a set of directories to search.  These can be either in the local
         filing system or remote ones accessed by http (http://domain/dir/... ).-->

        <references title="Normative References">
          <!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"?-->
          &RFC2119;
	      &RFC8682;
        </references>

        <references title="Informative References">
          <!-- Here we use entities that we defined at the beginning. -->
          &RFC6330;

          <!-- A reference written by by an organization not a person. -->
          <reference anchor="Yang14">
            <front>
              <title>Batched Sparse Codes</title>
              <author initials="S." surname="Yang" fullname="Shenghao Yang">
                 </author>
              <author initials="R.W." surname="Yeung" fullname="Raymond W. Yeung">
              </author>
              <date year="2014" />
            </front>
            <seriesInfo name="IEEE Transactions on Information Theory" value="60(9), 5322-5346" />
          </reference>

          <reference anchor="Yang17">
            <front>
              <title>BATS Codes: Theory and Practice</title>
              <author initials="S." surname="Yang" fullname="Shenghao Yang">
              </author>
              <author initials="R.W." surname="Yeung" fullname="Raymond W. Yeung">
              </author>
              <date year="2017"/>
            </front>
            <seriesInfo name="Morgan &#38; Claypool Publishers" value=""/>
          </reference>

	      <reference anchor="Yin19">
            <front>
              <title>A Unified Adaptive Recoding Framework for Batched Network Coding</title>
		      <author initials="H.H.F." surname="Yin" fullname="Hoover H.F. Yin"></author>
		      <author initials="B." surname="Tang" fullname="Bin Tang"></author>
		      <author initials="K.H." surname="Ng" fullname="Ka Hei Ng"></author>
		      <author initials="S." surname="Yang" fullname="Shenghao Yang">
              </author>
              <author initials="X." surname="Wang" fullname="Xishi Wang">
              </author>
		      <author initials="Q." surname="Zhou" fullname="Qiaoqiao Zhou">
              </author>
              <date year="2019"/>
            </front>
            <seriesInfo name="ISIT" value=""/>
          </reference>

	      <reference anchor="Yin20">
            <front>
              <title>A Protocol Design Paradigm for Batched Sparse Codes</title>
		      <author initials="H.H.F." surname="Yin" fullname="Hoover H.F. Yin"></author>
		      <author initials="R.W." surname="Yeung" fullname="Raymond W. Yeung">
              </author>
		      <author initials="S." surname="Yang" fullname="Shenghao Yang">
              </author>
              <date year="2020"/>
            </front>
            <seriesInfo name="Entroy" value=""/>
          </reference>

	      <reference anchor="Dong20">
            <front>
              <title>Network Utility Maximization for BATS Code enabled Multihop Wireless Networks</title>
		      <author initials="Y." surname="Dong" fullname="Yanyan Dong"></author>
		      <author initials="S." surname="Jin" fullname="Shengh Jin">
              </author>
		      <author initials="S." surname="Yang" fullname="Shenghao Yang">
              </author>
		      <author initials="H.H.F." surname="Yin" fullname="Hoover H.F. Yin"></author>
              <date year="2020"/>
            </front>
            <seriesInfo name="ICC" value=""/>
          </reference>

	      <reference anchor="Bhattad05">
            <front>
              <title>Weakly Secure Network Coding</title>
              <author initials="K." surname="Bhattad" fullname="Kapil Bhattad"></author>
              <author initials="K.R." surname="Narayanan" fullname="Krishna R. Narayanan"></author>
              <date year="2007" />
            </front>
            <seriesInfo name="ISIT" value=""/>
          </reference>

          <reference anchor="Zhao07">
            <front>
              <title>Signatures for content distribution with network coding</title>
              <author initials="F." surname="Zhao" fullname="Fang Zhao"></author>
              <author initials="T." surname="Kalker"></author>
              <author initials="M." surname="Medard"></author>
              <author initials="K.J." surname="Han"></author>
              <date year="2007" />
            </front>
            <seriesInfo name="ISIT" value=""/>
          </reference>

	      <reference anchor="Byers20">
            <front>
              <title>Liquid Data Networking</title>
              <author initials="J.W." surname="Byers" fullname="John W. Byers"></author>
              <author initials="M." surname="Luby" fullname="Michael Luby"></author>
              <date year="2020" />
            </front>
            <seriesInfo name="ICN" value=""/>
          </reference>

          <reference anchor="Yu08">
            <front>
              <title>An Efficient Signature-Based Scheme for Securing Network Coding Against Pollution Attacks</title>
              <author initials="Z." surname="Yu"></author>
              <author initials="Y." surname="Wei"></author>
              <author initials="B." surname="Ramkumar"></author>
              <author initials="Y." surname="Guan"></author>
              <date year="2008" />
            </front>
            <seriesInfo name="INFOCOM" value=""/>
          </reference>

          <reference anchor="Agrawal09">
            <front>
              <title>Homomorphic MACs: MAC-based integrity for network coding</title>
              <author initials="S." surname="Agrawal" fullname="Shweta Agrawal"></author>
              <author initials="D." surname="Boneh" fullname="Dan Boneh"></author>
              <date year="2009" />
            </front>
            <seriesInfo name="International Conference on Applied Cryptography and Network Security" value="" />
          </reference>

	      <reference anchor="TC-Wikipedia">
	        <front>
	      	  <title>Trusted Computing</title>
	      	  <author></author>
	      	  <date year="" />
	        </front>	
	        <seriesInfo name="Wikipedia" value="https://en.wikipedia.org/wiki/Trusted_Computing"/>
	      </reference>

	      <reference anchor="Sprea19">
            <front>
              <title>BATS Coding for Underwater Acoustic Communication Networks</title>
              <author initials="N." surname="Sprea" fullname="Nicol&ograve; Sprea"></author>
              <author initials="M." surname="Bashir" fullname="Murwan Bashir"></author>
		      <author initials="D." surname="Truhachev" fullname="Dmitri Truhachev"></author>
              <author initials="K.V." surname="Srinivas"></author>
		      <author initials="C." surname="Schlegel"></author>
              <author initials="C." surname="Claudio Sacchi"></author>
              <date year="2019" />
            </front>
            <seriesInfo name="OCEANS" value="" />
          </reference>

	      <reference anchor="Toh02">
	        <front>
	      	  <title>Ad Hoc Mobile Wireless Networks</title>
	      	  <author initials="C.K." surname="Toh" fullname="Chai Keong Toh"></author>
	      	  <date year="2002" />
	        </front>	
	        <seriesInfo name="Prentice Hall Publishers" value=""/>
	      </reference>
	      
	      <reference anchor="Zhang16">
	        <front>
	      	  <title>Combing CCN with network coding: An architectural perspective</title>
	      	  <author initials="G." surname="Zhang" fullname="Guoqiang Zhang"></author>
		      <author initials="Z." surname="Xu" fullname="Ziqu Xu"></author>
	      	  <date year="2016" />
	        </front>	
	        <seriesInfo name="Computer Networks" value=""/>
	      </reference>

          <reference anchor="Li03">
             <front>
	      	  <title>Linear Network Coding</title>
	      	  <author initials="S.-Y.R.." surname="Li"></author>
              <author initials="R.W." surname="Yeung" fullname="Raymond W. Yeung"></author>
		      <author initials="N." surname="Cai" fullname="Ning Cai"></author>
	      	  <date year="2003" />
	        </front>	
	        <seriesInfo name="IEEE Transactions on Information Theory" value=""/>
          </reference>

        </references>

        <section anchor="app-additional" title="Additional Stuff">
            <t></t>
        </section>

        <!-- Change Log  -->
    </back>
</rfc>
