<?xml version="1.0" encoding="utf-8"?>
<!-- 
     draft-rfcxml-general-template-standard-00
  
     This template includes examples of the most commonly used features of RFCXML with comments 
     explaining how to customise them. This template can be quickly turned into an I-D by editing 
     the examples provided. Look for [REPLACE], [REPLACE/DELETE], [CHECK] and edit accordingly.
     Note - 'DELETE' means delete the element or attribute, not just the contents.
     
     Documentation is at https://authors.ietf.org/en/templates-and-schemas
-->
<?xml-model href="rfc7991bis.rnc"?>  <!-- Required for schema validation and schema-aware editing -->
<!-- <?xml-stylesheet type="text/xsl" href="rfc2629.xslt" ?> -->
<!-- This third-party XSLT can be enabled for direct transformations in XML processors, including most browsers -->


<!DOCTYPE rfc [
  <!ENTITY nbsp    "&#160;">
  <!ENTITY zwsp   "&#8203;">
  <!ENTITY nbhy   "&#8209;">
  <!ENTITY wj     "&#8288;">
]>
<!-- If further character entities are required then they should be added to the DOCTYPE above.
     Use of an external entity file is not recommended. -->

<rfc
  xmlns:xi="http://www.w3.org/2001/XInclude"
  category="info"
  docName="draft-chang-agent-token-efficient-00"
  ipr="trust200902"
  obsoletes=""
  updates=""
  submissionType="IETF"
  xml:lang="en"
  version="3">
<!-- [REPLACE] 
       * docName with name of your draft
     [CHECK] 
       * category should be one of std, bcp, info, exp, historic
       * ipr should be one of trust200902, noModificationTrust200902, noDerivativesTrust200902, pre5378Trust200902
       * updates can be an RFC number as NNNN
       * obsoletes can be an RFC number as NNNN 
-->

  <front>
    <title abbrev="Abbreviated Title">A Token-efficient Data Layer for Agentic Communication</title>
    <!--  [REPLACE/DELETE] abbrev. The abbreviated title is required if the full title is longer than 39 characters -->

    <seriesInfo name="Internet-Draft" value="draft-chang-agent-token-efficient-00"/>
   
    <author fullname="Zeze" initials="Z." surname="Chang">
      <!-- [CHECK]
             * initials should not include an initial for the surname
             * role="editor" is optional -->
    <!-- Can have more than one author -->
      
    <!-- all of the following elements are optional -->
      <organization>Huawei</organization>
      <address>
        <postal>
          <!-- Reorder these if your country does things differently -->
          <street>No. 3, Shangdi Information Road, Haidian District</street>
          <city>Beijing</city>
          <code>100085</code>
          <country>China</country>
          <!-- Uses two letter country code -->
        </postal>        
        <email>changzeze@huawei.com</email>  
        <!-- Can have more than one <email> element -->
      </address>
    </author>

    <author fullname="Jinyang" initials="J." surname="Li">
      <!-- [CHECK]
             * initials should not include an initial for the surname
             * role="editor" is optional -->
    <!-- Can have more than one author -->
      
    <!-- all of the following elements are optional -->
      <organization>Huawei</organization>
      <address>
        <postal>
          <!-- Reorder these if your country does things differently -->
          <street>No. 3, Shangdi Information Road, Haidian District</street>
          <city>Beijing</city>
          <code>100085</code>
          <country>China</country>
          <!-- Uses two letter country code -->
        </postal>        
        <email>lijinyang9@huawei.com</email>  
        <!-- Can have more than one <email> element -->
      </address>
    </author>

    <author fullname="Zhen" initials="Z." surname="Cao">
      <!-- [CHECK]
             * initials should not include an initial for the surname
             * role="editor" is optional -->
    <!-- Can have more than one author -->
      
    <!-- all of the following elements are optional -->
      <organization>Huawei</organization>
      <address>
        <postal>
          <!-- Reorder these if your country does things differently -->
          <street>No. 3, Shangdi Information Road, Haidian District</street>
          <city>Beijing</city>
          <code>100085</code>
          <country>China</country>
          <!-- Uses two letter country code -->
        </postal>        
        <email>zhen.cao@huawei.com</email>  
        <!-- Can have more than one <email> element -->
      </address>
    </author>
   
    <date day="5" month="Dec" year="2025"/>
    <!-- On draft submission:
         * If only the current year is specified, the current day and month will be used.
         * If the month and year are both specified and are the current ones, the current day will
           be used
         * If the year is not the current one, it is necessary to specify at least a month and day="1" will be used.
    -->

    <area>General</area>
    <workgroup>Network Working Group</workgroup>
    <!-- "Internet Engineering Task Force" is fine for individual submissions.  If this element is 
          not present, the default is "Network Working Group", which is used by the RFC Editor as 
          a nod to the history of the RFC Series. -->

    <keyword>agentic communication, data layer, token efficiency</keyword>
    <!-- [REPLACE/DELETE]. Multiple allowed.  Keywords are incorporated into HTML output files for 
         use by search engines. -->

    <abstract>
      <t>Agentic communication fundamentally differs from traditional machine communication in that its outputs are recursively consumed
        and interpreted by Large Language Models (LLMs). This unique characteristic makes efficient use of the model's limited context a 
        critical requirement. However, current agent communication protocols - such as the Model Context Protocol (MCP) and Agent-to-Agent 
        (A2A) - often suffer from token or context bloat, caused by redundant schema definitions, verbose message structures, and inefficient 
        capability exchanges. As a result, a substantial number of tokens are unnecessarily consumed within the model’s context window. </t>
      <t>To address these issues, this draft defines the Agentic Data Optimization Layer (ADOL), a general and foundational data-layer for
        agent communication. ADOL introduces a set of backward-compatible optimizations, including schema deduplication through JSON references, 
        adaptive inclusion of optional fields, controllable response verbosity, and retrieval-based selection mechanisms. Collectively, these 
        mechanisms reduce token consumption, enhance context efficiency, and provide a scalable foundation for future agent communication frameworks.</t>
    </abstract>
 
  </front>

  <middle>
    
    <section>
      <name>Introduction</name>
      <section>
        <name>Token Bloat Problem in A2A (Agent-Agent Communication)</name>
        <t>The Agent-to-Agent (A2A) protocol defines structured message exchanges among multiple AI agents to support coordination, collaboration, 
          and task delegation. It provides a semantic framework through which agents can advertise their capabilities, exchange intermediate 
          reasoning results, and invoke each other's functions in a standardized manner.</t>
        <t>A2A communication typically uses JSON-based message structures to ensure interoperability across heterogeneous agents. However, as the 
          number and complexity of agents increase, message size and redundancy also grow. Agents often repeatedly exchange static or overlapping 
          information (such as capability descriptions, metadata, or schema definitions) across multiple rounds of interaction. This repetition 
          leads to a phenomenon known as token bloat, where verbose or redundant message components consume excessive tokens in the LLM's context window. </t>
        <t>Token bloat degrades reasoning efficiency, increases latency, and elevates operational cost, particularly in large-scale or multi-agent 
          deployments. These challenges highlight the need for a unified and optimized data layer that reduces redundancy while preserving 
          interoperability and semantic integrity. </t>
      </section>
      <section>
        <name>Token Bloat Problem in MCP  (Agent-Tool Communication)</name>
        <t>The Model Context Protocol (MCP) defines a standardized interface for communication 
          between Large Language Models (LLMs) and external tools or data sources. It allows 
          agents to describe, list, and invoke tools in a structured and interoperable manner. 
          MCP follows a client-server architecture consisting of a client (which connects to 
          the MCP server), a server (which hosts tool definitions and logic), and a host 
          (which coordinates the overall interaction). Conceptually, MCP consists of two layers: </t>      
        <t>1. the data layer, which defines JSON-RPC-based primitives such as tools, resources, prompts, and notifications; and </t>
        <t>2. the transport layer, which handles connection establishment, message framing, and authorization. </t>
        <t>As the number of MCP servers/tools increases, token bloat problem also emerges. Each tool's 
          detailed schema, including optional fields and human-readable descriptions, is transmitted to the 
          model during reasoning. Overlapping schemas, unfiltered tool lists, and verbose outputs also contribute 
          to redundant token usage. Though protocol correctness is unaffected, efficiency, latency, and cost are 
          negatively impacted. These limitations motivate the design of a more efficient data layer. </t>
      </section>
      <section>
        <name>Requirements Language</name>
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL",
          "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT
          RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
          interpreted as described in BCP 14 <xref target="RFC2119"/>
          <xref target="RFC8174"/> when, and only when, they appear in
          all capitals, as shown here.</t>
      </section>
      <!-- [CHECK] The 'Requirements Language' section is optional -->

    </section>
    
    <section>
      <name>Problem Statement</name>
      <t>Agentic communication introduces unique efficiency challenges because all exchanged 
        messages are recursively processed by Large Language Models (LLMs). Every transmitted 
        token consumes part of the model's limited context window, making redundancy and verbosity 
        directly detrimental to reasoning efficiency and operational cost.</t>
      <t>Across current agent protocols - such as the <xref target="MCP"/> and <xref target="A2A"/> - four common 
        inefficiencies are observed in the data-layer design: repetitive schemas, excessive optional content, 
        verbose responses, and inefficient capability selection. Together, these issues cause token and context 
        bloat, limiting scalability and responsiveness in multi-agent systems.   </t>
      <section>
        <name>Repetitive Schema Across Tools  </name>
        <t>When the LLM requests a list of tools from the server, the server returns a list of tools that includes 
          the tool schema for all tools. However, tools hosted on the same MCP server often share similar or 
          identical schema components, such as parameters or output schemas. These recurring elements not only 
          introduce unnecessary repetition but also waste tokens when provided to LLM. We have conducted an experiment 
          to analyze the duplicate content in the schemas of 60 tools within the official GitHub MCP server. According 
          to our results, the repetition rate of some contents within the 60 tool schemas, such as  </t>
        <figure>
        <name>Repetitive Schema Example</name>
        <sourcecode name="" type="json" markers="false">
          <![CDATA[
{
  "repo": {
        "description": "Repository name",
        "type": "string"
      }
}
          ]]>
        </sourcecode>
        <!-- [CHECK] markers="true" means that the rendered file will have <CODE BEGINS> and <CODE ENDS> added -->
      </figure>
      <t>reached 9.84%, indicating that it is necessary to improve the tool schemas to reduce such repetitive fields. </t>
      </section>

      <section>
        <name>Excessive Optional Content  </name>
        <t>The MCP tool schema currently includes a variety of optional fields, such as output 
          schemas and metadata. In many cases, these fields are not essential for successful tool 
          invocation. For instance, the output schema is primarily intended to help the LLM interpret 
          and structure the server’s response. However, when the server’s output is already concise and 
          semantically clear, the value of providing an additional output schema becomes marginal. From 
          the server’s standpoint, these fields are optional; yet, once transmitted, they are fully processed 
          and tokenized by the LLM, contributing directly to prompt length and token overhead. At present, the 
          client has no mechanism to indicate whether such optional content should be included, resulting in 
          unnecessary data exchange and reduced token efficiency. </t>
      </section>

      <section>
        <name>Overly Long Server Responses  </name>
        <t>Under the current MCP architecture, the server is responsible for generating outputs that conform to 
          its declared output schema. However, in many scenarios, this design results in overly verbose responses 
          that exceed the LLM’s actual informational needs. For example, when a weather-reporting tool provides 
          detailed structured data—including temperature, humidity, wind speed, and visibility—an LLM may only 
          require a subset of this information, such as temperature and humidity. Since the server lacks awareness 
          of the LLM’s contextual requirements, it always returns the complete output defined by the schema. Consequently, 
          the LLM must parse and tokenize all fields, even those irrelevant to the current prompt. This behavior 
          introduces unnecessary token consumption and increases computational cost, particularly in use cases where concise responses are sufficient. </t>
      </section>

      <section>
        <name>Inefficient Tool Selection  </name>
        <t>In practice, a service provider typically exposes a single MCP server that aggregates multiple tools under one endpoint. These tools may span heterogeneous functional domains (e.g., translation, data retrieval, summarization), all of which are simultaneously discoverable by the MCP client. While this design simplifies server-side deployment and management, it introduces inefficiency on the client side.
          As the number of available tools grows, agents must evaluate and compare an increasingly large set of tool descriptions to determine the most relevant one. This process not only increases computational overhead but also leads to selection ambiguity, especially when multiple tools exhibit overlapping functionalities or verbose metadata.
          Empirical observations indicate that excessively detailed tool descriptions, while intended to enhance interpretability, can paradoxically increase token consumption during parsing and prompt formation. Moreover, the expanded search space degrades the accuracy and efficiency of tool selection, particularly in real-time or resource-constrained scenarios. </t>
      </section>
    </section>
    
    <section>
      <name>Solution Overview  </name>
      <figure anchor="fig-architecture">
        <name>Typical workflow of ADOL</name>
        <artset>
        <!-- This <artset> includes two <artwork> elements, each of a different type -->
          <artwork type="ascii-art" name="stream.txt">

     +--------------------------------------------------------+
     |         Agentic Data Optimization Layer (ADOL)         |
     +--------------------------------------------------------+

+--------+               +---------------+               +-----------+
|        |               |               |               |           |
|  LLM   |               |    Client     | B&amp;D.Tool/list |  Server   |
|        |               |               +-------------->+           |
|        |               |               | --short --tag |           |
|        |               |               |               |           |
|        | Curated List  |               |A.Curated List |           |
|        &lt;---------------+               &lt;---------------+           |
|        |               |               |               |           |
|        |   Tool Call   |               |   Tool Call   |           |
|        +-------------->+               +-------------->+           |
|        |C.requireOutput|               |               |           |
|        |               |               |               |           |
|        |    Response   |               |    Response   |           |
|        &lt;---------------+ requireOutput &lt;---------------+           |
|        |   (Filtered)  |               |               |           |
+--------+               +---------------+               +-----------+

          </artwork>
        </artset>
      </figure>
      <t>To systematically address the inefficiencies identified above, this draft introduces the Agentic Data Optimization Layer (ADOL) -- 
        a unified and backward-compatible enhancement to existing agentic data layers. Rather than focusing on a single protocol, ADOL 
        provides a generalized framework that improves the efficiency of data exchange across diverse agent communication settings, 
        including both Model Context Protocol (MCP) and Agent-to-Agent (A2A) workflows. </t>
      <t>ADOL defines a set of lightweight mechanisms that preserve interoperability while significantly reducing 
        redundant token usage in LLM-driven interactions. It optimizes data compactness and adaptiveness without 
        altering existing transport or semantic layers. As displayed in <xref target="fig-architecture"/>, ADOL 
        enhances the agentic data layer along four complementary dimensions: </t>
      <ol type="A">
        <li>Schema Deduplication: eliminate repetitive definitions through JSON references;</li>
        <li>Adaptive Optional Inclusion: allow clients to request concise or full schemas; </li>
        <li>Controllable Response Verbosity: enable models to specify the required output scope; </li>
        <li>Context-Aware Tool Selection: limit tool exposure to those relevant to the current task. </li>
      </ol>
      <t>Together, these mechanisms establish a more efficient and extensible foundation for agentic communication, reducing token and context bloat while maintaining full compatibility with existing agent frameworks. </t>
      <section>
        <name>Schema Deduplication Using JSON References </name>
        <t>To eliminate redundant schema definitions, MCP tool schemas may enable the JSON `$ref` mechanism, which was not supported in the original MCP architecture and thus leads to a significant amount of duplicate content. This JSON `$ref` mechanism supports both internal and external references, as can be seen in <xref target="JSON"/>. </t>
        <figure>
        <name>Example: Internal Reference</name>
        <sourcecode name="" type="json" markers="false">
          <![CDATA[
{
  "definitions": {
    "user": { "type": "object" }
  },
  "properties": {
    "data": { "$ref": "#/definitions/user" }
  }
}
          ]]>
        </sourcecode>
        <!-- [CHECK] markers="true" means that the rendered file will have <CODE BEGINS> and <CODE ENDS> added -->
      </figure>
      <t>The internal reference case enables schema reuse within the same file, which applies to situations where there is multiple duplicate content within the same Json schema.   </t>
      <figure>
        <name>Example: External Reference</name>
        <sourcecode name="" type="json" markers="false">
          <![CDATA[
{
  "$ref": "schema/common/user.json"
}
          ]]>
        </sourcecode>
        <!-- [CHECK] markers="true" means that the rendered file will have <CODE BEGINS> and <CODE ENDS> added -->
      </figure>
      <t>The external reference allows a tool schema to reference a shared template stored either locally or remotely. The MCP host or client may cache these schemas to avoid repeated token consumption. This mechanism is more suitable for situations where there is overlapping content among multiple tool schemas. We have conducted extensive experiments where the tool schema in the server are modified by using $ref references. The results show that the LLM can still accurately recognize and invoke these tools, introducing no backward incompatibility.  </t>
      </section>
      <section>
        <name>Adaptive Optional Field Inclusion </name>
        <t>When responding to `tool/list` requests, servers SHOULD determine whether to include optional schema content based on the client request. Specifically, an additional field `optional` is introduced in the server's tool configuration file, which contains schema content that is not mandatory. If the client requests a “short” list (for example, via `tool/list --short`), the server SHOULD omit unnecessary optional contents included in the `optional` field and return a concise tool schema. This conditional inclusion mechanism prevents unnecessary token usage.  </t>
      </section>
      <section>
        <name>Controllable Response Verbosity </name>
        <t>Client SHOULD be capable of adjusting response verbosity based on the LLM’s request. To address this, ADOL introduces a new `requireOutput` field in the tool schema.  </t>
        <figure>
        <name>RequireOutput Schema</name>
        <sourcecode name="" type="json" markers="false">
          <![CDATA[
{
  "requireOutput": {
    "description": "A list of output field names that the LLM expects to be returned from the tool's complete output.",
    "type": "array",    
    "items": {
      "type": "string",
      "enum": ["toolOutcome_1", "toolOutcome_2", "toolOutcome_n"]
    },
    "uniqueItems": true
  }
}
          ]]>
        </sourcecode>
        <!-- [CHECK] markers="true" means that the rendered file will have <CODE BEGINS> and <CODE ENDS> added -->
      </figure>
      <t>When invoking a tool, the LLM first retrieves the tool list and corresponding schemas through the client. Each tool schema may include a `requireOutput` field, which defines an optional list of output fields that can be selectively requested. This field enables the LLM to specify which parts of a tool’s output are relevant to the current context. Based on this field, the LLM infers which elements of the tool’s defined enumerated outcome are needed and passes the generated `requireOutput` schema to the MCP client. Upon receiving the tool’s execution result, the MCP client filters the output according to `requireOutput` and returns the trimmed result to the LLM.
For example, in a weather-related tool, the output schema may contain multiple fields such as current conditions, temperature, humidity, and visibility. The `requireOutput` allows the client to specify which subset of these results should be included (e.g., `"requireOutput": ["temperature", "humidity"]`). After receiving the server’s response, the client extracts and forwards only these fields to the LLM, filtering out unnecessary content while preserving semantic completeness.
This design ensures that the LLM only receives the necessary data for the current context, thereby reducing redundant information and minimizing token consumption.
  </t>
      </section>
      <section>
        <name>Tool Selection Mechanism </name>
        <t>To alleviate the token overhead caused by large tool lists, an retrieval-based mechanism is introduced. Generally, The MCP client may include additional informative field in the `tool/list` request to indicate its requirements for tool. The server then filters and returns only the tools that meet the need. We propose two typical implementation approaches: </t>
        <ol>
          <li>Semantic-based retrieval: The client provides a natural language description of its requirement. The server computes an embedding for the requirement and compares it with precomputed embeddings of all available tools. Then, the server ranks the tools based on similarity and returns either the top-K most relevant tools or those whose similarity score exceeds a predefined threshold.  </li>
          <li>Tag-based retrieval: The client provides one or more predefined tool tags. Each tool is assigned at least one tag. The server returns the tools whose tags match the ones requested by the client.   </li>
        </ol>
        <t>These two approaches can accommodate servers with different capabilities. Approach 1 requires a more sophisticated server implementation, but it is expected to provide better retrieval performance and higher flexibility. In contrast, Approach 2 is simpler to implement, though it introduces some usage limitations.  </t>
        <t>By tool selection mechanism, the range of tools selected by the LLM is narrowed down, thereby reducing the token overhead of the LLM and improving the accuracy of tool selection. </t>
      </section>
    </section>
    <section>
      <name>Specification </name>
      <t>The proposed extensions can be summarized as follows: </t>
      <table>
        <name>Mechanism Summary</name>
        <thead>
          <tr>
            <th>Mechanism</th>
            <th>Description</th>
            <th>Impact</th>
          </tr>
        </thead>
        <tbody>
          <tr>
            <td>JSON $ref support</td>
            <td>Deduplicate schema content</td>
            <td>Reduce token usage</td>
          </tr>
          <tr>
            <td>Optional field control</td>
            <td>Client-specified schema inclusion</td>
            <td>Reduce token usage</td>
          </tr>
          <tr>
            <td>Response Verbosity control</td>
            <td>Adaptive detail level in output</td>
            <td>Reduce token usage</td>
          </tr>
          <tr>
            <td>Retrieval-based tool selection</td>
            <td>Vectorized tool selection</td>
            <td>Improve accuracy</td>
          </tr>
        </tbody>
      </table>
      <t>Backward compatibility is maintained, as none of the proposed changes modify existing MCP schema semantics. </t>
    </section>

    
    <section anchor="IANA">
    <!-- All drafts are required to have an IANA considerations section. See RFC 8126 for a guide.-->
      <name>IANA Considerations</name>
      <t>This memo includes no request to IANA. </t>
    </section>
    
    <section anchor="Security">
      <!-- All drafts are required to have a security considerations section. See RFC 3552 for a guide. -->
      <name>Security Considerations</name>
      <t>Embedding-based retrieval introduces potential information exposure risks if tool embeddings or client embeddings are transmitted in plaintext. Implementations SHOULD ensure encryption and access control when exchanging embedding vectors. Schema caching and external references should only reference trusted sources to avoid code injection or schema poisoning.</t>
      <t>In addition to this, ADOL does not introduce new security vulnerabilities beyond those already present in existing agent communication protocols such as MCP or A2A. However, implementers should ensure that any schema references or external resources retrieved via $ref are obtained from trusted sources to prevent schema injection or data tampering. </t>
    </section>
    
    <!-- NOTE: The Acknowledgements and Contributors sections are at the end of this template -->
  </middle>

  <back>
    <references>
      <name>References</name>
      <references>
        <name>Normative References</name>
        
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"/>
        <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"/>
        <!-- The recommended and simplest way to include a well known reference -->
        
      </references>
 
      <references>
        <name>Informative References</name>

        <reference anchor="A2A" target="https://a2a-protocol.org/latest">
        <!-- [REPLACE/DELETE] Example reference written by an organization not a person -->
          <front>
            <title>Agent2Agent(A2A) Protocol</title>
            <author>
              <organization>Google</organization>
            </author>
            <date year="2025"/>
            <!-- [CHECK] -->
          </front>
        </reference>       

        <reference anchor="MCP" target="https://modelcontextprotocol.io/docs/getting-started/intro">
        <!-- [REPLACE/DELETE] Example reference written by an organization not a person -->
          <front>
            <title>Model Context Protocol (MCP)</title>
            <author>
              <organization>Anthropic</organization>
            </author>
            <date year="2025"/>
            <!-- [CHECK] -->
          </front>
        </reference>

        <reference anchor="JSON">
        <!-- [REPLACE/DELETE] Example minimum reference -->
          <front>
            <title>JSON Schema: A Media Type for Describing JSON Documents</title>
            <author initials="A." surname="Wright">
              <organization/>
            </author>
            <author initials="H." surname="Andrews">
              <organization/>
            </author>
            <author initials="B." surname="Hutton">
              <organization/>
            </author>
            <author initials="G." surname="Dennis">
              <organization/>
            </author>
            <date year="2022"/>
            <!-- [CHECK] -->
          </front>
        </reference>       

      </references>
    </references>
    

    <section anchor="Acknowledgements" numbered="false">
      <!-- [REPLACE/DELETE] an Acknowledgements section is optional -->
      <name>Acknowledgements</name>
      <t>The author thanks the MCP open-source community for the ongoing discussion and the SEP authors for foundational design insights. The SEP link we proposed in the official MCP GitHub community is: https://github.com/modelcontextprotocol/modelcontextprotocol/issues/1576. </t>
    </section>
    

    
 </back>
</rfc>
