<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
There has to be one entity for each item to be referenced. 
An alternate method (rfc include) is described in the references. -->

<!ENTITY RFC2119 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC3550 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3550.xml">
<!ENTITY RFC4585 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4585.xml">
<!ENTITY RFC5124 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5124.xml">
<!ENTITY RFC5481 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5481.xml">
<!ENTITY RFC6817 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6817.xml">
<!ENTITY RFC1323 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1323.xml">
<!ENTITY I-D.welzl-rmcat-coupled-cc SYSTEM
"http://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.draft-welzl-rmcat-coupled-cc-04.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
(Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space 
(using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="exp" docName="draft-ietf-rmcat-sbd-02" ipr="trust200902">
  <!-- category values: std, bcp, info, exp, and historic
       ipr values: full3667, noModification3667, noDerivatives3667
       you can add the attributes updates="NNNN" and obsoletes="NNNN" 
       they will automatically be output with "(if approved)" -->
  
  <!-- ***** FRONT MATTER ***** -->
  
  <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the 
         full title is longer than 39 characters -->
    
    <title abbrev="SBD for CCC with RTP Media">
      Shared Bottleneck Detection for Coupled Congestion Control for
      RTP Media.
    </title>
    
    <!-- add 'role="editor"' below for the editors if appropriate -->
    
    <!-- Another author who claims to be an editor -->
    
    <author fullname="David Hayes" initials="D.A." role="editor"
            surname="Hayes">
      <organization>University of Oslo</organization>
      <address>
        <postal>
          <street>PO Box 1080 Blindern</street>
          <city>Oslo</city>
          <region></region>
          <code>N-0316</code>
          <country>Norway</country>
        </postal>
        <phone>+47 2284 5566</phone>
        <email>davihay@ifi.uio.no</email>
      </address>
    </author>
   <author fullname="Simone Ferlin" initials="S."
            surname="Ferlin">
      <organization>Simula Research Laboratory</organization>
      <address>
        <postal>
          <street>P.O.Box 134</street>
          <city>Lysaker</city>
          <region></region>
          <code>1325</code>
          <country>Norway</country>
        </postal>
        <phone>+47 4072 0702</phone>
        <email>ferlin@simula.no</email>
      </address>
    </author>
    <author fullname="Michael Welzl" initials="M."
            surname="Welzl">
      <organization>University of Oslo</organization>
      <address>
        <postal>
          <street>PO Box 1080 Blindern</street>
          <city>Oslo</city>
          <region></region>
          <code>N-0316</code>
          <country>Norway</country>
        </postal>
        <phone>+47 2285 2420</phone>
        <email>michawe@ifi.uio.no</email>
      </address>
    </author>
    <author fullname="Kristian Hiorth" initials="K."
            surname="Kiorth">
      <organization>University of Oslo</organization>
      <address>
        <postal>
          <street>PO Box 1080 Blindern</street>
          <city>Oslo</city>
          <region></region>
          <code>N-0316</code>
          <country>Norway</country>
        </postal>
        <email>kristahi@ifi.uio.no</email>
      </address>
    </author>
    
    <date month="October" year="2015" />

    <!-- If the month and year are both specified and are the current ones, xml2rfc will fill 
         in the current day for you. If only the current year is specified, xml2rfc will fill 
	 in the current day and month for you. If the year is not the current one, it is 
	 necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the 
	 purpose of calculating the expiry date).  With drafts it is normally sufficient to 
	 specify just the year. -->

    <!-- Meta-data Declarations -->

    <area>General</area>

    <workgroup>RTP Media Congestion Avoidance Techniques</workgroup>

    <!-- WG name at the upperleft corner of the doc,
         IETF is fine for individual submissions.  
	 If this element is not present, the default is "Network Working Group",
         which is used by the RFC Editor as a nod to the history of the IETF. -->

    <keyword>SBD</keyword>

    <!-- Keywords will be incorporated into HTML output
         files in a meta tag but they have no effect on text or nroff
         output. If you submit your draft to the RFC Editor, the
         keywords will be used for the search engine. -->

    <abstract>
      <t>This document describes a mechanism to detect whether
      end-to-end data flows
      share a common bottleneck. It relies on summary statistics that are calculated by
      a data receiver based on continuous measurements and regularly fed to a grouping algorithm that
      runs wherever the knowledge is needed. This mechanism complements the coupled congestion
      control mechanism in draft-welzl-rmcat-coupled-cc.</t>
    </abstract>
  </front>

  <middle>
 
    <section title="Introduction">
      <t>In the Internet, it is not normally known if flows (e.g., TCP connections or UDP data streams)
      traverse the same bottlenecks. Even flows that have the same sender and receiver may take
      different paths and share a bottleneck or not. Flows that share a bottleneck link usually
      compete with one another for their share of the capacity. This competition has the potential
      to increase packet loss and delays. This is especially relevant for interactive applications
      that communicate simultaneously with multiple peers (such as multi-party video). For RTP
      media applications such as RTCWEB, <xref target="I-D.welzl-rmcat-coupled-cc"></xref> describes
      a scheme that combines
      the congestion controllers of flows in order to honor their priorities and avoid unnecessary
      packet loss as well as delay.
      This mechanism relies on some form of Shared Bottleneck Detection (SBD); here, a
      measurement-based SBD approach is described.</t>


	<section title="The signals">
	  <t>The current Internet is unable to explicitly inform
	  endpoints as to which flows share bottlenecks, so endpoints
	  need to infer this from whatever information is available to
	  them. The mechanism described here currently utilises packet
	  loss and packet delay, but is not restricted to these.</t>

	  <section title="Packet Loss">
	    <t>Packet loss is often a relatively rare
	    signal. Therefore, on its own it is of limited use for
	    SBD, however, it is a valuable supplementary measure when
	    it is more prevalent.</t>
	  </section>

	  <section title="Packet Delay">
	    <t>End-to-end delay measurements include noise from every
	    device along the path in addition to the delay
	    perturbation at the bottleneck device. The noise is
	    often significantly increased if the round-trip time is used. The
	    cleanest signal is obtained by using One-Way-Delay
	    (OWD).</t>

	    <t>Measuring absolute OWD is difficult since it requires
	    both the sender and receiver clocks to be
	    synchronised. However, since the statistics being
	    collected are relative to the mean OWD, a relative OWD
	    measurement is sufficient. Clock skew is not usually
	    significant over the time intervals used by this SBD
	    mechanism (see <xref target="RFC6817"/> A.2 for a
	    discussion on clock skew and OWD measurements). However,
	    in circumstances where it is significant, <xref
	    target="clockskew"/> outlines a way of adjusting the
	    calculations to cater for it.</t>

	    <t>Each packet arriving at the bottleneck buffer may
	    experience very different queue lengths, and therefore different
	    waiting times. A single OWD sample does not, therefore,
	    characterize the path well. However,
	    multiple OWD measurements do reflect the distribution of
	    delays experienced at the bottleneck.</t>
	  </section>
	
	  <section title="Path Lag">
	    <t>Flows that share a common bottleneck may traverse
	    different paths, and these paths will often have different
	    base delays. This makes it difficult to correlate changes
	    in delay or loss. This technique uses the long term shape
	    of the delay distribution as a base for comparison to
	    counter this.</t>
	  </section>
	</section>
    </section>
      
    <section anchor="Definitions" title="Definitions">
      <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
      "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
      document are to be interpreted as described in <xref
      target="RFC2119">RFC 2119</xref>.</t>

      <t>Acronyms used in this document:
      <list hangIndent="10" style="hanging">
	<t hangText="   OWD --"> One Way Delay</t>
	<t hangText="   MAD --"> Mean Absolute Deviation</t>
	<t hangText="   RTT --"> Round Trip Time</t>
	<t hangText="   SBD --"> Shared Bottleneck Detection</t>
      </list></t>

      <t>Conventions used in this document:
      <list hangIndent="18" style="hanging">
	<t hangText="   T       --"> the base time interval over which measurements
	are made.</t>
	<t hangText="   N       --"> the number of base time, T, intervals
	used in some calculations.</t>
	<t hangText="   sum_T(...) --">  summation of all the
	measurements of the variable in parentheses taken over the
	interval T</t>
	<t hangText="   sum(...)   --">  summation of terms of the variable in parentheses</t>
	<t hangText="   sum_N(...) --">  summation of N terms of the variable in parentheses</t>
	<t hangText="   sum_NT(...) --"> summation of all
	measurements taken over the interval N*T</t>
	<t hangText="   E_T(...) --">  the expectation or mean of the
	measurements of the variable in parentheses over T</t>
	<t hangText="   E_N(...) --">  the expectation or mean of the last N values of
	the variable in parentheses</t>
	<t hangText="   E_M(...) --">  the expectation or mean of the last M values of
	the variable in parentheses, where M &lt;= N.</t>
	<t hangText="   max_T(...) --"> the maximum recorded measurement
	of the variable in parentheses taken over the interval T</t>
	<t hangText="   min_T(...) --"> the minimum recorded measurement
	of the variable in parentheses taken over the interval T</t>
	<t hangText="   num_T(...) --"> the count of measurements of the
	variable in parentheses taken in the interval T</t>
	<t hangText="   num_VM(...) --"> the count of valid values of the
	variable in parentheses given M records</t>
	<t hangText="   PB --"> a boolean variable indicating the
	particular flow was identified transiting a bottleneck in
	the previous interval T (i.e. Previously Bottleneck)</t>
	<t hangText="   skew_est --"> a measure of skewness in a OWD
	distribution.</t>
	<t hangText="  skew_base_T --"> a variable used as an
	intermediate step in calculating skew_est.</t>
	<t hangText="   var_est --"> a measure of variability in
	OWD measurements.</t>
	<t hangText="  var_base_T --"> a variable used as an
	intermediate step in calculating var_est.</t>
	<t hangText="   freq_est --"> a measure of low frequency
	oscillation in the OWD measurements.</t>
	<t hangText="   p_l, p_f, p_mad, c_s, c_h, p_s, p_d, p_v --"> various
	thresholds used in the mechanism</t>
	<t hangText="   M and F --"> number of values related to N</t>
      </list>.<vspace blankLines="100" /></t>

      <section anchor="parameters" title="Parameters and their Effect">
	<t><list hangIndent="8" style="hanging">
	  <t hangText="T"> T should be long enough so that there are
	  enough packets received during T for a useful estimate of
	  short term mean OWD and variation statistics. Making T too
	  large can limit the efficacy of freq_est. It will
	  also increase the response time of the mechanism. Making T
	  too small will make the metrics noisier.</t>

	  <t hangText="N &amp; M"> N should be large enough to provide a
	  stable estimate of oscillations in OWD. Usually M=N, though
	  having M&lt;N may be beneficial in certain circumstances.
	  M*T needs to be long enough to provide stable estimates of
	  skewness and MAD.</t>

	  <t hangText="F"> F determines the number of intervals
	  over which statistics are considered to be equally
	  weighted. When F=M recent and older measurements are
	  considered equal. Making F&lt;M can increase the
	  responsiveness of the SBD mechanism. If F is too small,
	  statistics will be too noisy.</t>

	  <t hangText="c_s"> c_s is the threshold in skew_est used for
	  determining whether a flow is transiting a bottleneck or
	  not. It should be slightly negative so that a very lightly
	  loaded path does not give a false indication. Setting c_s
	  more negative makes the SBD mechanism less sensitive to
	  transient and slight bottlenecks.</t>

	  <t hangText="c_h"> c_h adds hysteresis to the botteneck
	  determination. It should be large enough to avoid constant
	  switching in the determination, but low enough to ensure
	  that grouping is not attempted when there is no bottleneck
	  and the delay and loss signals cannot be relied upon.</t>

	  <t hangText="p_v"> p_v determines the sensitivity of freq_est
	  to noise. Making it smaller will yield higher but noisier
	  values for freq_est. Making it too large will render it
	  ineffective for determining groups.</t>

	  <t hangText="p_*"> Flows are separated when the
	  skew_est|var_est|freq_est measure is greater than
	  p_s|p_f|p_d|p_mad. Adjusting these is a compromise
	  between false grouping of flows that do not share a
	  bottleneck and false splitting of flows that do. Making them
	  larger can help if the measures are very noisy, but reducing
	  the noise in the statistical measures by adjusting T and N|M
	  may be a better solution.</t>
	  
	</list></t>
	
      </section>

      <section anchor="recommended-parameters" title="Recommended Parameter Values">
	  <t>Reference <xref target="Hayes-LCN14"/> uses T=350ms,
	  N=50, p_l=0.1. The other parameters have been tightened to
	  reflect minor enhancements to the algorithm outlined in
	  <xref target="removingnoise"/>:
	   c_s=-0.01, p_f=p_d=0.1, p_s=0.15,
	   p_mad=0.1, p_v=0.7. M=30, F=20, and c_h = 0.3 are additional
	   parameters defined in the document.
	   These are values that seem to work well over a wide range of practical
	  Internet conditions.</t>
      </section>
      
    </section>


    <section anchor="Mechanism" title="Mechanism">
      <t>The mechanism described in this document is based on the
      observation that the distribution of delay measurements of
      packets that traverse a
      common bottleneck have similar shape characteristics. These
      shape characteristics are described using 3 key summary
      statistics:
      <list style="hanging">
          <t>variability (estimate var_est, see <xref target="sbd_mad"/>)</t>
          <t>skewness (estimate skew_est, see <xref target="sbd_skewest"/>)</t>
	  <t>oscillation (estimate freq_est, see <xref target="sbd_freqest"/>)</t>
      </list>
      with packet loss (estimate pkt_loss, see <xref
      target="sbd_pktloss"/>) used as a supplementary statistic.</t>
      
      <t>Summary statistics help to address both the noise and the
      path lag problems by describing the general shape over a
      relatively long period of time. Each summary statistic portrays
      a "view" of the bottleneck link characteristics, and when used
      together, they provide a robust discrimination for grouping flows.
      They can be signalled from a receiver, which measures the OWD
      and calculates the summary statistics, to a sender, which is the
      entity that is transmitting the media stream. An RTP Media
      device may be both a sender and a receiver. SBD can be performed
      at either a sender or a receiver or both.</t>

           <figure align="center" anchor="sbd-topo">
         <!-- <preamble>Preamble text - can be omitted or empty.</preamble> -->

        <artwork align="left"><![CDATA[
                               +----+
                               | H2 |
                               +----+
                                  |
                                  | L2
                                  |
                      +----+  L1  |  L3  +----+
                      | H1 |------|------| H3 |
                      +----+             +----+
            ]]></artwork>

        <postamble>A network with 3 hosts (H1, H2, H3) and 3 links (L1, L2, L3).</postamble>
      </figure>

      <t>In <xref target="sbd-topo" />, there are two possible cases
      for shared bottleneck detection: a sender-based and a
      receiver-based case.
      <list style="numbers">
	<t>Sender-based: consider a situation where host H1 sends media
	  streams to hosts H2 and H3, and L1 is a shared bottleneck.
	  H2 and H3 measure the OWD and calculate summary statistics,
	  which they send to H1 every T. H1, having this knowledge,
	  can determine the shared bottleneck and accordingly control
	  the send rates.</t>

	  <t>Receiver-based: consider that H2 is also sending media to
	  H3, and L3 is a shared bottleneck. If H3 sends summary
	  statistics to H1 and H2, neither H1 nor H2 alone obtain
	  enough knowledge to detect this shared bottleneck; H3 can
	  however determine it by combining the summary statistics
	  related to H1 and H2, respectively. This case is applicable
	  when send rates are controlled by the receiver; then, the
	  signal from H3 to the senders contains the sending rate.</t>
      </list></t>

      <t>A discussion of the required signalling for the receiver-based
      case is beyond the scope of this document. For the sender-based
      case, the messages and their data format will be defined here in
      future versions of this document.</t>

      <t>We envisige the following exchange during initialisation:
      <list style="symbols">
	<t>An initialization message from the sender to the receiver
	will contain the following information:
	<list style="symbols">
	  <t> A protocol identifier (SBD=01). This is to future proof
	  the message exchange so that potential advances in SBD
	  technology can be easily deployed. All following
	  initialisation elements relate to the mechanism outlined in
	  this document which will have the identifier SBD=01.</t>
	  
	  <t> A list of which key metrics should be collected and
	  relayed back to the sender out of a possibly extensible set
	  (pkt_loss, var_est, skew_est, freq_est).  The grouping
	  algorithm described in this document requires all four of
	  these metrics, and receivers MUST be able to provide them, but
	  future algorithms may be able to exploit other metrics
	  (e.g. metrics based on explicit network signals).</t>
	  
	  <t> The values of T, N, M, and the necessary resolution and
	  precision of the relayed statistics.</t>
	</list> </t>        
	
	<t>A response message from the receiver acknowledges this message
	with a list of key metrics it supports (subset of the senders list)
	and is able to relay back to the sender.</t>

	<t>This initialisation exchange may be repeated to finalize the
	agreed metrics should not all be supported by all
	receivers.</t>
      </list></t>


     

      <section anchor="sbd-metrics" title="Key metrics and their calculation">

	<t>Measurements are calculated over a base interval, T and
	summarized over N or M such intervals.  All summary statistics
	can be calculated incrementally.
	</t>

	<section title="Mean delay">

	  <t>The mean delay is not a useful signal for comparisons
	  between flows since flows may traverse quite different paths
	  and clocks will not necessarily be synchronized. However, it
	  is a base measure for the 3 summary statistics. The mean
	  delay, E_T(OWD), is the average one way delay measured over
	  T.</t>
	  
	  <t>To facilitate the other calculations, the last N
	  E_T(OWD) values will need to be stored in a cyclic buffer
	  along with the moving
	  average of E_T(OWD):
	  <list style="hanging">
	    <t>mean_delay = E_M(E_T(OWD)) = sum_M(E_T(OWD)) / M</t>
	  </list>
	  where M &le; N. Setting M to be less than N
	  allows the mechanism to be more responsive to changes, but
	  potentially at the expense of a higher error rate (see <xref
	  target="improvingresponse"/> for a discussion on improving
	  the responsiveness of the mechanism.) </t>
	</section>
	
	<section anchor="sbd_skewest" title="Skewness Estimate">
	  <t>Skewness is difficult to calculate efficiently and
	  accurately. Ideally it should be calculated over the entire
	  period (M * T) from the mean OWD over that period. However this
	  would require storing every delay measurement over the
	  period. Instead, an estimate is made over M * T based on a
	  calculation every T using the previous T's calculation of
	  mean_delay.</t>

	  <t>The base for the skewness calculation is estimated using a counter initialised
	  every T. It increments for one way delay samples (OWD) below the mean and
	  decrements for OWD above the mean. So for each OWD sample:
	  <list style="hanging">
	    <t>if (OWD &lt;  mean_delay) skew_base_T++</t>
	    <t>if (OWD > mean_delay) skew_base_T--</t>
	  </list></t>
	  <t>The mean_delay does not include the mean of the
	  current T interval to enable it to be calculated iteratively.</t>
	  <t>skew_est = sum_MT(skew_base_T)/num_MT(OWD)
	  <list style="hanging">
	    <t> where skew_est is a number between -1 and 1</t>
	  </list></t>
	  
	  <t>Note: Care must be taken when implementing the
	  comparisons to ensure that rounding does not bias
	  skew_est. It is important that the mean is calculated
	  with a higher precision than the samples.
	</t>
	</section>

	<section anchor="sbd_mad" title="Variability Estimate">
	  <t>Mean Absolute Deviation (MAD) delay is a robust
	  variability measure that copes well with different send
	  rates. It can be implemented in an online manner as follows:
	  <list style="hanging">
	    <t> var_base_T = sum_T(|OWD - E_T(OWD)|)
	    <list style="hanging"><t>where
	    <list style="hanging">
	      <t>|x| is the absolute value of x</t>
	      <t>E_T(OWD) is the mean OWD calculated in the previous
	      T</t>
	    </list></t>
	    </list></t>
	    <t>var_est = MAD_MT = sum_MT(var_base_T)/num_MT(OWD) </t>
	  </list></t>
	  
	  <t>For calculation of freq_est p_v=0.7</t>

	  <t>For the grouping threshold p_mad=0.1</t>
	</section>

	<section anchor="sbd_freqest" title="Oscillation Estimate">
	  <t>An estimate of the low frequency oscillation of the delay
	  signal is calculated by counting and normalising the significant mean,
	  E_T(OWD), crossings of mean_delay:
	  <list style="hanging">
	    <t>freq_est = number_of_crossings / N
	    <list style="hanging">
	      <t> where we define a significant mean
	      crossing as a crossing that extends p_v * var_est from
	      mean_delay. In our experiments we have found that p_v =
	      0.7 is a good value.</t>
	    </list></t>
	  </list>
	  Freq_est is a number between 0 and 1. Freq_est
	  can be approximated incrementally as follows:
	  <list style="hanging">
	    <t> With each new calculation of E_T(OWD) a decision is
	    made as to whether this value of E_T(OWD) significantly
	    crosses the current long term mean, mean_delay, with respect to
	    the previous significant mean crossing.</t>
	    
	    <t>A cyclic buffer, last_N_crossings, records a 1 if there is a significant
	    mean crossing, otherwise a 0.</t>

	    <t>The counter, number_of_crossings, is incremented when there
	    is a significant mean crossing and decremented when a
	    non-zero value is removed from the last_N_crossings.</t>
	  </list>
	  This approximation of freq_est was not used in <xref
	  target="Hayes-LCN14"/>, which calculated freq_est every T
	  using the current E_N(E_T(OWD)). Our tests show that
	  this approximation of freq_est yields results that are almost
	  identical to when the full calculation is performed every
	  T.</t>

	</section>

	<section anchor="sbd_pktloss" title="Packet loss">
	  <t>The proportion of packets lost over the period NT is used
	  as a supplementary measure:
	  <list style="hanging">
	    <t>pkt_loss = sum_NT(lost packets) / sum_NT(total
	    packets)</t>
	  </list>
	  Note: When pkt_loss is small it is very variable, however,
	  when pkt_loss is high it becomes a stable measure for
	  making grouping decisions.</t>
	</section>
      </section>
      
      
	

      <section title="Flow Grouping">
	<section anchor="flowgrouping" title="Flow Grouping Algorithm">
	  <t>The following grouping algorithm is RECOMMENDED for SBD
	  in the RMCAT context and is sufficient and efficient for small to
	  moderate numbers of flows. For very large numbers of flows
	  (e.g. hundreds), a more complex clustering algorithm may be
	  substituted.</t>

	  <t>Since no single metric is precise enough to group flows
	  (due to noise), the algorithm uses multiple metrics. Each
	  metric offers a different "view" of the bottleneck link
	  characteristics, and used together they enable a more precise
	  grouping of flows than would otherwise be possible.</t>
	  
	  <t>Flows determined to be transiting a bottleneck are
	  successively divided into groups based on freq_est,
	  var_est, skew_est and pkt_loss.</t>
	  
          <t>The first step is to determine which flows are
          transiting a bottleneck. This is important, since if a flow
          is not transiting a bottleneck its delay based metrics will
          not describe the bottleneck, but the "noise" from the rest
          of the path. Skewness, with proportion of packet loss as a
          supplementary measure, is used to do this:
	  <list counter="grouping" style="format %d.">
	    <t>Grouping will be performed on flows that are inferred
	    to be traversing a bottleneck by:
	    <list style="hanging">
	      <t>skew_est &lt; c_s
	      <list style="hanging">
		<t>|| ( skew_est &lt; c_h &amp;
		PB ) || pkt_loss > p_l</t>
	      </list></t>
	    </list></t>
	  </list></t>
	  
	  <t>The parameter c_s controls how sensitive the mechanism is
	  in detecting a bottleneck. C_s = 0.0 was used in  <xref
	  target="Hayes-LCN14"/>. A value of c_s = 0.05 is a little
	  more sensitive, and c_s = -0.05 is a little less
	  sensitive. C_h controls the hysteresis on flows that were
	  grouped as transiting a bottleneck last time. If the test
	  result is TRUE, PB=TRUE, otherwise PB=FALSE.</t>

	  <t>These flows, flows transiting a bottleneck, are then
	  progressively divided into groups based on the freq_est, var_est,
	  and skew_est summary statistics. The process proceeds
	  according to the following steps:
	  <list counter="grouping" style="format %d." >
	    <t>Group flows whose difference in sorted freq_est is less than a
	    threshold:
	    <list style="hanging">
	      <t> diff(freq_est) &lt;  p_f</t>
	    </list></t>
	    <t>Group flows whose difference in sorted E_M(var_est)
	    (highest to lowest) is less than a threshold:
	    <list style="hanging">
	      <t> diff(var_est) &lt;  (p_mad * var_est) </t>
	    </list>The threshold,  (p_mad * var_est), is with respect
	    to the highest value in the difference.</t>
            <t>Group flows whose difference in sorted skew_est is less
            than a threshold:
	    <list style="hanging">
	      <t> diff(skew_est) &lt;  p_s </t>
	    </list></t>
	    <t>When packet loss is high enough to be reliable
	    (pkt_loss &gt; p_l), group flows whose difference is less
	    than a threshold
	      <list style="hanging">
		<t>diff(pkt_loss) &lt;  (p_d * pkt_loss) </t>
		</list>The threshold,  (p_d * pkt_loss), is with respect
	    to the highest value in the difference.</t>
	  </list></t>
	  
	  <t>This procedure involves sorting estimates from highest to
	  lowest. It is simple to implement, and efficient for small
	  numbers of flows (up to 10-20).</t>

	</section>
	<section title="Using the flow group signal">
	  <t>Grouping decisions can be made every T from the second T,
	  however they will not attain their full design accuracy until
	  after the 2*N'th T interval. We recommend that grouping
	  decisions are not made until 2*M T intervals.</t>
	  <t>Network conditions, and even the congestion controllers,
	  can cause bottlenecks to fluctuate. A coupled congestion
	  controller MAY decide only to couple groups that remain
	  stable, say grouped together 90% of the time, depending on
	  its objectives. Recommendations concerning this are beyond
	  the scope of this draft and will be specific to the coupled
	  congestion controllers objectives.</t>
	</section>
      </section>

      <section anchor="removingnoise" title="Removing Noise from the Estimates">
	<t>The following describe small changes to the calculation of
	the key metrics that help remove noise from them. Currently these
	"tweaks" are described separately to keep the main description
	succinct. In future revisions of the draft these enhancements
	may replace the original key metric calculations.</t>
	

	<section anchor="oscillationnoise" title="Oscillation noise">
	  <t>When a path has no bottleneck, var_est will be very small and
	  the recorded significant mean crossings will be the result
	  of path noise. Thus up to N-1 meaningless mean crossings can
	  be a source of error at the point a link becomes a
	  bottleneck and flows traversing it begin to be grouped.</t>
	  
	  <t>To remove this source of noise from freq_est:
	  <list counter="oscn" style="format %d.">
	    <t>Set the current var_base_T = NaN (a value representing
	    an invalid record, i.e. Not a Number) for flows that are
	    deemed to not be transiting a bottleneck by the first
	    skew_est based grouping test (see <xref
	    target="flowgrouping"/>).</t>
	    <t> Then var_est =  sum_MT(var_base_T != NaN) / num_MT(OWD)</t>
	    <t> For freq_est, only record a significant mean crossing
	    if flow deemed to be transiting a bottleneck.</t>
	  </list>
	  These three changes can help to remove the non-bottleneck noise
	  from freq_est. </t>
	</section>


	<section anchor="clockskew" title="Clock skew">
	  <t>Generally sender and receiver clock skew will be too
	  small to cause significant errors in the
	  estimators. Skew_est is most sensitive to this type of
	  noise. In circumstances where clock skew is high, basing
	  skew_est only on the previous T's mean provides a noisier
	  but reliable signal.</t>

	  <t>A better method is to estimate the effect the clock
	  skew is having on the summary statistics, and then adjust
	  statistics accordingly. A simple online method of doing this
	  based on min_T(OWD) will be described here in a subsequent
	  version of the draft.</t>

	</section>
	<!--
	<section title="Bias in the variability measure">
	  <t>Var_est can also be biased when measuring varying rate
	  flows. This bias can be corrected as follows.
	  <list style="hanging">
	    <t> PDV_weight = PDV * num_T(OWD)</t>
	    <t> var_est = sum_MT(PDV_weight)/num_MT(OWD)</t>
	  </list></t>
	  <t> This does not require additional state, however, a
	  cyclic buffer storing PDV_weight values will replace the one
	  that stored PDV values.</t>
	</section>
	-->
      </section>

      <section anchor="improvingresponse" title="Reducing lag and Improving
						 Responsiveness">
	<t>Measurement based shared bottleneck detection makes
	decisions in the present based on what has been measured in the
	past. This means that there is always a lag in responding to
	changing conditions. This mechanism is based on summary
	statistics taken over (N*T) seconds. This mechanism can be made more
	responsive to changing conditions by:
	<list style="numbers">
	<t>Reducing N and/or M -- but at
	the expense of having less accurate metrics, and/or</t>
	<t>Exploiting the fact that more recent measurements are more
	valuable than older measurements and weighting them
	accordingly.</t>
	</list></t>


	  <t>Although more recent measurements are more valuable,
	  older measurements are still needed to gain an accurate
	  estimate of the distribution descriptor we are measuring.
	  Unfortunately, the simple exponentially weighted moving
	  average weights drop off too quickly for our requirements
	  and have an infinite tail. A simple linearly declining
	  weighted moving average also does not provide enough weight
	  to the most recent measurements. We propose a piecewise
	  linear distribution of weights, such that the first section
	  (samples 1:F)
	  is flat as in a simple moving average, and the second
	  section (samples F+1:M) is linearly declining weights to the end of the
	  averaging window. We choose integer weights, which allows
	  incremental calculation without introducing rounding
	  errors.</t>


	<section anchor="skewrespimp" title="Improving the response of
	  the skewness estimate">
	  <t>The weighted moving average for skew_est, based on
	  skew_est in <xref
	    target="sbd_skewest"/>, can be calculated as follows:
	  <list style="hanging">
	    <t><list hangIndent="11" style="hanging">
	      <t hangText="skew_est =">((M-F+1)*sum(skew_base_T(1:F))
      	      <list hangIndent="5" style="hanging">
		<t>+ sum([(M-F):1].*skew_base_T(F+1:M))) </t>
	      </list></t>
	      <t>/ ((M-F+1)*sum(numsampT(1:F))
	      <list hangIndent="5" style="hanging">
		<t>+ sum([(M-F):1].*numsampT(F+1:M)))</t>
	      </list></t>
	    </list></t>
	  </list></t>
	  <t>where numsampT is an array of the number of OWD samples
	  in each T (i.e. num_T(OWD)), and numsampT(1) is the most
	  recent; skew_base_T(1) is the most recent calculation of
	  skew_base_T; 1:F refers to the integer values 1 through to F, and
	  [(M-F):1] refers to an array of the integer values (M-F) declining through
	  to 1; and ".*" is the array scalar dot product operator.<vspace blankLines="100" /></t>

	  <t>To calculate this weighted skew_est incrementally:
	  <list  hangIndent="13" style="hanging">
	    <t hangText="Notation:"> F_ - flat portion, D_ - declining
	    portion, W_&nbsp;-&nbsp;weighted component</t>
	    <t hangText="Initialise:">sum_skewbase = 0, F_skewbase=0, W_D_skewbase=0</t>
	    <t>skewbase_hist = buffer length M initialize to 0</t>
	    <t>numsampT = buffer length M initialzed to 0</t>
	    <t hangText="Steps per iteration:">  </t>
	  </list>
	  <list style="numbers">
	    <t>old_skewbase = skewbase_hist(M)</t>
	    <t>old_numsampT = numsampT(M)</t>
	    <t>cycle(skewbase_hist)</t>
	    <t>cycle(numsampT)</t>
	    <t>numsampT(1) = num_T(OWD)</t>
	    <t>skewbase_hist(1) = skew_base_T</t>
	    <t>F_skewbase = F_skewbase + skew_base_T - skewbase_hist(F+1)</t>
	    <t>W_D_skewbase = W_D_skewbase + (M-F)*skewbase_hist(F+1) &nbsp;&nbsp;-&nbsp;sum_skewbase</t>
            <t>W_D_numsamp =
	    W_D_numsamp + (M-F)*numsampT(F+1) - sum_numsamp &nbsp;&nbsp;+&nbsp;F_numsamp</t>
            <t>F_numsamp =  F_numsamp + numsampT(1) - numsampT(F+1)</t>
	    <t>sum_skewbase = sum_skewbase + skewbase_hist(F+1) - old_skewbase</t>
            <t>sum_numsamp = sum_numsamp + numsampT(1) - old_numsampT</t>
            <t>skew_est = ((M-F+1)*F_skewbase +
	    W_D_skewbase) / &nbsp;&nbsp;((M-F+1)*F_numsamp+W_D_numsamp)</t>
	  </list>
	  Where cycle(....) refers to the operation on a cyclic buffer
	  where the start of the buffer is now the next element in the
	  buffer.</t>
	</section>

	<section anchor="varrespimp" title="Improving the response of
	  the variability estimate">
	  <t>Similarly the weighted moving average for var_est can be
	  calculated as follows:
	  <list style="hanging">
	    <t><list hangIndent="11" style="hanging">
	      <t hangText="var_est =">((M-F+1)*sum(var_base_T(1:F))
      	      <list hangIndent="5" style="hanging">
		<t>+ sum([(M-F):1].*var_base_T(F+1:M))) </t>
	      </list></t>
	      <t>/ ((M-F+1)*sum(numsampT(1:F))
	      <list hangIndent="5" style="hanging">
		<t>+ sum([(M-F):1].*numsampT(F+1:M)))</t>
	      </list></t>
	    </list></t>
	  </list></t>
	  <t>where numsampT is an array of the number of OWD samples
	  in each T (i.e. num_T(OWD)), and numsampT(1) is the most
	  recent; skew_base_T(1) is the most recent calculation of
	  skew_base_T; 1:F refers to the integer values 1 through to F, and
	  [(M-F):1] refers to an array of the integer values (M-F) declining through
	  to 1; and ".*" is the array scalar dot product operator.
	  When removing oscillation noise (see  <xref target="oscillationnoise"/>) this
	  calculation must be adjusted to allow for invalid var_base_T
	  records.</t>

	  <t>Var_est can be calculated incrementally in the same way
	  as skew_est in <xref target="skewrespimp"/>. However, note
	  that the buffer numsampT is used for both calculations so
	  the operations on it should not be repeated.</t>
	</section>

      </section>

    </section>

      
    <section title="Measuring OWD">

     <t>This section discusses the OWD measurements required for this
     algorithm to detect shared bottlenecks.
     </t>

     <t>The SBD mechanism described in
     this draft relies on differences between OWD measurements to avoid the
     practical problems with measuring absolute OWD (see <xref
     target="Hayes-LCN14"/> section IIIC). Since all summary statistics are
     relative to the mean OWD and sender/receiver clock offsets
     should be approximately constant over the measurement periods, the
     offset is subtracted out in the calculation.</t> 
     
     <section title="Time stamp resolution">
       <t>The SBD mechanism requires timing information precise enough
       to be able to make comparisons. As a rule of thumb, the time
       resolution should be less than one hundredth of a typical path's range
       of delays. In general, the lower the time resolution, the more
       care that needs to be taken to ensure rounding errors do not bias the
       skewness calculation.</t>

       <t>Typical RTP media flows use sub-millisecond timers,
       which should be adequate in most situations.</t>
     </section>

<!--
     <section title="System Timers">
       <t>DavidH: possibly to be included as a guide in a subsequent
       iteration, though probably not the TCP part.</t>

       <t>
	 The following remarks discuss system timers, and may help in
	 some implementation scenarios where available timer
	 granularity could influence where in the system SBD is
	 performed.
       </t>
       
       <t>
	 For an implementation of SBD in kernel-space
	 the system's timestamp resolution is of importance: Earlier systems have the
	 accuracy of the timestamps given by the resolution of the clock mantained by
	 the kernel in jiffy, also called system's kernel tick, given by HZ or hz variables.
	 And the jiffy length is determined by the system's kernel tick. Newer Linux
	 systems have the kernel tick set by default to 250, sometimes also to 1000.
	 Newer FreeBSD systems have the kernel tick set 1000 by default.
	 Thus, yelding to jiffies of 4 or maximum of 1 ms. For the implementer of SBD
	 using the system's time resolution the size of one jiffy is relevant. Larger jiffy
	 values allow for better timer granularity and resolution, however, it comes at
	 the cost of more CPU cycles.
	 In newer systems, other timing source is the high-resolution kernel timer
	 introduced for sub-jiffy granularity. However, this is yet not supported in 
	 all hardware architectures and, thus, it is recommended to the
	 implementer of SBD to first test its support and usability.
       </t>
     
       <t>
	 In particular for applications running on top of TCP, the implementer
	 of SBD could make use of the TCP-TS option, in similar way to LEDBAT, to get
	 OWD sample measurements. However, the TCP timestamp option does not
	 ensure higher resolution because it relies on the kernel jiffy length.
	 For an application sending enough traffic, the TCP-TS is updated at maximum
	 of 1 ms for a system's jiffy length of 1000. Also, the TCP-TS option is limited
	 to two four-byte fields, which also does not guarantee finer than millisecond
	 granularity.
	 
	 Alternatively, reliable OWD samples can be also generate inside the
	 application itself and written into the packet's payload. The implementer of
	 SBD has to decide the necessary granularity given at this level by the amount
	 of data generated and the application's run-time performance.
       </t>
       
       <t>
	 In general, the implementer has to decide which granularity for SBD is necessary
	 depending on its application scenario. If the time granularity of SBD is limited to
	 a jiffy length and, thus, not higher than milliseconds, the OWD of the underlying
	 network path should also not be less than milliseconds. This would cause loss in
	 time precision and the SBD mechanism is unable to detect OWD oscillation, usually
	 represented by changes in the OWD's sample lowest bits. 
       </t>
     </section>
     
-->
     </section>
<!--    
    <section title="Networks and Parameter Settings">
      <t>short discussion as to what parameters might be good, say for
      data centers.</t>
    </section>

-->
 
    <section title="Implementation status">
      <t> The University of Oslo is currently working on an
      implementation of this in the Chromium browser.</t>
    </section>

    <section anchor="Acknowledgements" title="Acknowledgements">
      <t>This work was part-funded by the
      European Community under its Seventh Framework Programme through
      the Reducing Internet Transport Latency (RITE) project
      (ICT-317700). The views expressed are solely those of the
      authors. </t>

    </section>

    <!-- Possibly a 'Contributors' section ... -->

    <section anchor="IANA" title="IANA Considerations">
      <t>This memo includes no request to IANA.</t>

      <!--
      <t>All drafts are required to have an IANA considerations section (see
      <xref target="I-D.narten-iana-considerations-rfc2434bis">the update of
      RFC 2434</xref> for a guide). If the draft does not require IANA to do
      anything, the section contains an explicit statement that this is the
      case (as above). If there are no requirements for IANA, the section will
      be removed during conversion into an RFC by the RFC Editor.</t>
      -->
    </section>

    <section anchor="Security" title="Security Considerations">

      <t>The security considerations of <xref target="RFC3550">RFC
      3550</xref>, <xref target="RFC4585">RFC 4585</xref>, and <xref
      target="RFC5124">RFC 5124</xref> are
      expected to apply.</t>
      
      <t>Non-authenticated RTCP packets carrying shared bottleneck indications and summary
      statistics could allow attackers to alter the bottleneck sharing
      characteristics for private gain or disruption of other parties
      communication.</t>
    </section>


    <section anchor="ChangeHistory" title="Change history">
      
      <t>Changes made to this document:
      <list hangIndent="18" style="hanging">
	<t hangText=" WG-01->WG-02 :">Removed ambiguity associated
	with the term "congestion". Expanded the description of
	initialisation messages. Removed PDV metric. Added description
	of incremental weighted metric calculations for
	skew_est. Various clarifications based on implementation
	work. Fixed typos and tuned parameters.</t>
	<t hangText="  WG-00->WG-01 :">Moved unbiased skew section to
	replace skew estimate, more robust variability estimator, the
	term variance replaced with variability, clock drift term
	corrected to clock skew,
	revision to clock skew section with a place holder, description
	of parameters.</t>
	<t hangText="  02->WG-00 :">Fixed missing 0.5 in 3.3.2 and
	missing brace in 3.3.3 </t>
	<t hangText="  01->02 :">New section describing improvements
	to the key metric calculations that help to remove noise,
	bias, and reduce lag. Some revisions to the notation to make
	it clearer. Some
	tightening of the thresholds.</t>
	<t hangText="  00->01 :">Revisions to terminology for
	clarity</t>
      </list></t>
    </section>

  </middle>

  <!--  *****BACK MATTER ***** -->

  <back>
    <!-- References split into informative and normative -->

    <!-- There are 2 ways to insert reference entries from the citation libraries:
     1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
     2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
        (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

     Both are cited textually in the same manner: by using xref elements.
     If you use the PI option, xml2rfc will, by default, try to find included files in the same
     directory as the including file. You can also define the XML_LIBRARY environment variable
     with a value containing a set of directories to search.  These can be either in the local
     filing system or remote ones accessed by http (http://domain/dir/... ).-->

    <references title="Normative References">

      &RFC2119;

      <!-- the following is the minimum to make xml2rfc happy -->
      <!--
      <reference anchor="min_ref">
        <front>
          <title>Minimal Reference</title>

          <author initials="authInitials" surname="authSurName">
            <organization></organization>
          </author>

          <date year="2006" />
        </front>
      </reference> -->
    </references> 

    <references title="Informative References">
      <!-- Here we use entities that we defined at the beginning. -->


      &RFC3550;

      &RFC4585;

      &RFC5124;

      &RFC5481;

      &RFC6817;

      &I-D.welzl-rmcat-coupled-cc;

      <!-- A reference written by by an organization not a person. -->

      <reference anchor="Hayes-LCN14"
                 target="http://heim.ifi.uio.no/davihay/hayes14__pract_passiv_shared_bottl_detec-abstract.html">
        <front>
          <title>Practical Passive Shared Bottleneck Detection using Shape
	Summary Statistics</title>

          <author initials="D. A." surname="Hayes">
            <organization>University of Oslo</organization>
          </author>
          <author initials="S." surname="Ferlin">
            <organization>Simula Research Laboratory</organization>
          </author>
          <author initials="M." surname="Welzl">
            <organization>University of Oslo</organization>
          </author>
          <date year="2014" month="September"/>
        </front>
	  <seriesInfo name="Proc. the IEEE Local Computer Networks
			    (LCN)" value="p150-158"/>
      </reference>

      <reference anchor="ITU-Y1540"
                 target="http://www.itu.int/rec/T-REC-Y.1540-201103-I/en">
        <front>
          <title>Internet Protocol Data Communication Service - IP
          Packet Transfer and Availability Performance
          Parameters</title>

          <author>
            <organization>ITU-T</organization>
          </author>
          <date year="2011" month="March"/>
        </front>
	  <seriesInfo name="Series Y: Global Information
			    Infrastructure, Internet Protocol Aspects
			    and Next-Generation Networks" value=""/>
      </reference>
    </references>

<!-- <reference anchor="DOMINATION"
     target="http://www.example.com/dominator.html"> <front>
     <title>Ultimate Plan for Taking Over the World</title>

          <author>
            <organization>Mad Dominators, Inc.</organization>
          </author>

          <date year="1984" />
        </front>
      </reference> -->


<!--     <section anchor="app-additional" title="Additional Stuff">
      <t>This becomes an Appendix.</t>
    </section> -->

  </back>
</rfc>
