<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Speech on Fahim Dalvi</title>
    <link>https://fdalvi.github.io/tags/speech/</link>
    <description>Recent content in Speech on Fahim Dalvi</description>
    <generator>Hugo</generator>
    <language>en</language>
    <lastBuildDate>Fri, 15 Aug 2025 13:00:00 +0300</lastBuildDate>
    <atom:link href="https://fdalvi.github.io/tags/speech/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Paper Accepted at Interspeech 2025!</title>
      <link>https://fdalvi.github.io/blog/2025-08-15-from-words-to-waves-interspeech-2025/</link>
      <pubDate>Fri, 15 Aug 2025 13:00:00 +0300</pubDate>
      <guid>https://fdalvi.github.io/blog/2025-08-15-from-words-to-waves-interspeech-2025/</guid>
      <description>&lt;p&gt;Our paper &lt;a href=&#34;https://doi.org/10.21437/Interspeech.2025-2180&#34;&gt;From Words to Waves: Analyzing Concept Formation in Speech and Text-Based Foundation Models&lt;/a&gt; has been accepted at &lt;a href=&#34;https://interspeech2025.org/&#34;&gt;Interspeech 2025&lt;/a&gt;.&lt;/p&gt;&#xA;&lt;p&gt;LLMs have shown that text-only training can give models remarkable reasoning abilities and abstract semantic understanding. This raises a fascinating question: &lt;strong&gt;do speech models develop similar conceptual structures when trained only on audio?&lt;/strong&gt; And when models are trained on both speech and text together, do they build a richer understanding?&lt;/p&gt;&#xA;&lt;p&gt;We used &lt;strong&gt;Latent Concept Analysis&lt;/strong&gt; from our prior work on interpretability to examine how semantic abstractions form across modalities, and find lots of interesting differences on how speech and text modalities differ in their internal representations. We released our code and a curated audio version of the SST-2 dataset on &lt;a href=&#34;https://github.com/shammur/MultimodalXplain&#34;&gt;GitHub&lt;/a&gt; and &lt;a href=&#34;https://huggingface.co/collections/QCRI/multimodalxplain&#34;&gt;Hugging Face&lt;/a&gt; to support reproducibility.&lt;/p&gt;</description>
    </item>
  </channel>
</rss>
