{ "cells": [ { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ "***************\n", "Temporal Models\n", "***************" ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. _prince2012computer-ex-19.1:\n", "\n", "Exercise 19.1\n", "=============\n", "\n", "Suppose\n", "\n", ".. math::\n", "\n", " \\DeclareMathOperator{\\NormDist}{Norm}\n", " Pr(\\mathbf{w}_{t - 1} \\mid \\mathbf{x}_{1 \\ldots t - 1}) =\n", " \\NormDist_{\\mathbf{w}_{t - 1}}\\left[\n", " \\boldsymbol{\\mu}_{t - 1}, \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right].\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " &= \\int\n", " Pr(\\mathbf{w}_t, \\mathbf{w}_{t - 1} \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " d\\mathbf{w}_{t - 1}\n", " & \\quad & \\text{(2.1)}\\\\\n", " &= \\int Pr(\\mathbf{w}_t \\mid \\mathbf{w}_{t - 1})\n", " Pr(\\mathbf{w}_{t - 1} \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " d\\mathbf{w}_{t - 1}\n", " & \\quad & \\text{Markov assumption}\\\\\n", " &= \\int \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_p + \\boldsymbol{\\Psi} \\mathbf{w}_{t - 1},\n", " \\boldsymbol{\\Sigma}_p\n", " \\right]\n", " \\NormDist_{\\mathbf{w}_{t - 1}}\\left[\n", " \\boldsymbol{\\mu}_{t - 1}, \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right]\n", " d\\mathbf{w}_{t - 1}\n", " & \\quad & \\text{(19.6)}\\\\\n", " &= \\kappa_1 \\kappa_2\n", " \\int \\NormDist_{\\mathbf{w}_{t - 1}}\\left[\n", " \\boldsymbol{\\mu}'', \\boldsymbol{\\Sigma}''\n", " \\right]\n", " d\\mathbf{w}_{t - 1}\n", " & \\quad & \\text{(a), (b)}\\\\\n", " &= \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_p + \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1},\n", " \\boldsymbol{\\Sigma}_p +\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_{t - 1} \\boldsymbol{\\Psi}^\\top\n", " \\right]\n", " & \\quad & \\text{(c)}\\\\\n", " &= \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_+, \\boldsymbol{\\Sigma}_+\n", " \\right]\n", "\n", "(a)\n", "---\n", "\n", "By :ref:`Exercise 5.10 `,\n", "\n", ".. math::\n", "\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_p + \\boldsymbol{\\Psi} \\mathbf{w}_{t - 1},\n", " \\boldsymbol{\\Sigma}_p\n", " \\right] =\n", " \\kappa_1 \\NormDist_{\\mathbf{w}_{t - 1}}\\left[\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t,\n", " \\boldsymbol{\\Sigma}'\n", " \\right]\n", "\n", "where\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}'\n", " &= (\\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\boldsymbol{\\Psi})^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\Psi}'\n", " &= \\boldsymbol{\\Sigma}' \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\mu}'\n", " &= -\\boldsymbol{\\Sigma}' \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\boldsymbol{\\mu}_p\n", " \\\\\\\\\n", " \\kappa_1\n", " &= \\frac{\n", " \\left\\vert \\boldsymbol{\\Sigma}' \\right\\vert^{1 / 2}\n", " }{\n", " \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert^{1 / 2}\n", " }\n", " \\exp\\left[\n", " -0.5\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p^{-1} -\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}'\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)\n", " \\right].\n", "\n", "(b)\n", "---\n", "\n", "By :ref:`Exercise 5.7 ` and\n", ":ref:`Exercise 5.9 `,\n", "\n", ".. math::\n", "\n", " \\kappa_1 \\NormDist_{\\mathbf{w}_{t - 1}}\\left[\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t,\n", " \\boldsymbol{\\Sigma}'\n", " \\right]\n", " \\NormDist_{\\mathbf{w}_{t - 1}}\\left[\n", " \\boldsymbol{\\mu}_{t - 1}, \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right] =\n", " \\kappa_1 \\kappa_2 \\NormDist_{\\mathbf{w}_{t - 1}}\\left[\n", " \\boldsymbol{\\mu}'', \\boldsymbol{\\Sigma}''\n", " \\right]\n", "\n", "where\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}''\n", " &= \\left(\n", " {\\boldsymbol{\\Sigma}'}^{-1} + \\boldsymbol{\\Sigma}_{t - 1}^{-1}\n", " \\right)^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\mu}''\n", " &= \\boldsymbol{\\Sigma}''\n", " \\left(\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left( \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t \\right) +\n", " \\boldsymbol{\\Sigma}_{t - 1}^{-1} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\n", " \\\\\\\\\n", " \\kappa_2\n", " &= \\NormDist_{\\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{t - 1},\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right].\n", "\n", "(c)\n", "---\n", "\n", ".. math::\n", "\n", " & \\kappa_1 \\kappa_2\\\\\n", " &= \\frac{\n", " \\left\\vert \\boldsymbol{\\Sigma}' \\right\\vert^{1 / 2}\n", " }{\n", " \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert^{1 / 2}\n", " }\n", " \\exp\\left[\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p^{-1} -\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}'\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)\n", " \\right]^{-0.5}\n", " \\NormDist_{\\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{t - 1},\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right]\\\\\n", " &= \\frac{\n", " \\left\\vert \\boldsymbol{\\Sigma}' \\right\\vert^{1 / 2}\n", " \\exp\\left[\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p^{-1} -\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}'\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p) +\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right)^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\n", " \\right]^{-0.5}\n", " }{\n", " (2 \\pi)^{D / 2}\n", " \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert^{1 / 2}\n", " \\left\\vert\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right\\vert^{1 / 2}\n", " }\\\\\n", " &= \\frac{1}{\n", " (2 \\pi)^{D / 2}\n", " \\left\\vert\n", " \\boldsymbol{\\Sigma}_p +\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_{t - 1} \\boldsymbol{\\Psi}^\\top\n", " \\right\\vert^{1 / 2}\n", " }\n", " \\exp\\left[\n", " \\left(\n", " \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_p - \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p +\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_{t - 1} \\boldsymbol{\\Psi}^\\top\n", " \\right)^{-1}\n", " \\left(\n", " \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_p - \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\n", " \\right]^{-0.5}\n", " & \\quad & \\text{(c.1), (c.2)}\n", "\n", "(c.1)\n", "-----\n", "\n", ".. math::\n", "\n", " \\frac{\n", " \\left\\vert \\boldsymbol{\\Sigma}' \\right\\vert^{1 / 2}\n", " }{\n", " \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert^{1 / 2}\n", " \\left\\vert\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right\\vert^{1 / 2}\n", " }\n", " &= \\left(\n", " \\left\\vert\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\right\\vert\n", " \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert\n", " \\left\\vert\n", " \\left(\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\right)^{-1} + \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right\\vert\n", " \\right)^{-1 / 2}\n", " & \\quad & \\text{(C.11)}\\\\\n", " &= \\left(\n", " \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert\n", " \\left\\vert\n", " \\mathbf{I} +\n", " \\left(\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\right) \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right\\vert\n", " \\right)^{-1 / 2}\n", " & \\quad & \\text{(C.10)}\\\\\n", " &= \\left(\n", " \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert\n", " \\left\\vert\n", " \\mathbf{I} +\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_{t - 1}\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right\\vert\n", " \\right)^{-1 / 2}\n", " & \\quad & \\text{Sylvester's Determinant Theorem}\\\\\n", " &= \\left\\vert\n", " \\boldsymbol{\\Sigma}_p +\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_{t - 1} \\boldsymbol{\\Psi}^\\top\n", " \\right\\vert^{-1 / 2}\n", " & \\quad & \\text{(C.10)}\n", "\n", "(c.2)\n", "-----\n", "\n", ".. math::\n", "\n", " & \\exp\\left[\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p^{-1} -\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}'\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p) +\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right)^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\n", " \\right]^{-0.5}\\\\\n", " &= \\exp\\left[\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p^{-1} -\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}'\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p) +\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right)^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\n", " \\right]^{-0.5}\\\\\n", " &= \\exp\\left[\n", " \\left(\n", " \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_p - \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p^{-1} -\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t - 1}^{-1} +\n", " \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " \\boldsymbol{\\Psi}\n", " \\right)^{-1}\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " \\left(\n", " \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_p - \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\n", " \\right]^{-0.5}\n", " & \\quad & \\text{(c.3)}\\\\\n", " &= \\exp\\left[\n", " \\left(\n", " \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_p - \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p +\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_{t - 1} \\boldsymbol{\\Psi}^\\top\n", " \\right)^{-1}\n", " \\left(\n", " \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_p - \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\n", " \\right]^{-0.5}\n", " & \\quad & \\text{(C.61)}\n", "\n", "(c.3)\n", "-----\n", "\n", "Notice that the summands be decomposed into\n", "\n", ".. math::\n", "\n", " & (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p^{-1} -\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}'\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)\\\\\n", " &= (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p) -\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}'\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)\\\\\n", " &= (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p) -\n", " (\\boldsymbol{\\Psi}' \\mathbf{w}_t + \\boldsymbol{\\mu}')^\\top\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " (\\boldsymbol{\\Psi}' \\mathbf{w}_t + \\boldsymbol{\\mu}')\n", "\n", "and\n", "\n", ".. math::\n", "\n", " & \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1}\n", " \\right)^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\\\\\n", " &= \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\left(\n", " {\\boldsymbol{\\Sigma}'}^{-1} -\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t - 1}^{-1} +\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\right)^{-1}\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\right)\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\n", " & \\quad & \\text{(c.4)}\\\\\n", " &= \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right) -\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t - 1}^{-1} +\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\right)^{-1}\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\\\\\n", " &= \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right) -\n", " \\left(\n", " \\mathbf{w}_t - \\boldsymbol{\\mu} -\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t - 1}^{-1} +\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\right)^{-1}\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\left(\n", " \\mathbf{w}_t - \\boldsymbol{\\mu} -\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right).\n", "\n", "Since\n", "\n", ".. math::\n", "\n", " & \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\\\\\n", " &= \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t\n", " \\right)^\\top\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t\n", " \\right) -\n", " \\boldsymbol{\\mu}_{t - 1}^\\top\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " 2 \\boldsymbol{\\mu}' +\n", " 2 \\boldsymbol{\\Psi}' \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\\\\\n", " &= \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t\n", " \\right)^\\top\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_t\n", " \\right) -\n", " \\boldsymbol{\\mu}_{t - 1}^\\top \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " \\left(\n", " 2 \\mathbf{w}_t -\n", " 2 \\boldsymbol{\\mu} -\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\n", "\n", "and\n", "\n", ".. math::\n", "\n", " & (\\mathbf{w}_t - \\boldsymbol{\\mu}_p)^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " (\\mathbf{w}_t - \\boldsymbol{\\mu}_p) -\n", " \\boldsymbol{\\mu}_{t - 1}^\\top \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " \\left(\n", " 2 \\mathbf{w}_t -\n", " 2 \\boldsymbol{\\mu} -\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)\\\\\n", " &= \\left(\n", " \\mathbf{w}_t - \\boldsymbol{\\mu}_p -\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " \\left(\n", " \\mathbf{w}_t - \\boldsymbol{\\mu}_p -\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right),\n", "\n", "the sum of the original summands is\n", "\n", ".. math::\n", "\n", " \\left(\n", " \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_p - \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p^{-1} -\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t - 1}^{-1} +\n", " \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " \\boldsymbol{\\Psi}\n", " \\right)^{-1}\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " \\left(\n", " \\mathbf{w}_t -\n", " \\boldsymbol{\\mu}_p - \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{t - 1}\n", " \\right).\n", "\n", "(c.4)\n", "-----\n", "\n", "See :ref:`Exercise 5.9 ` for more details.\n", "\n", ".. math::\n", "\n", " \\left( \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1} \\right)^{-1}\n", " &= {\\boldsymbol{\\Sigma}'}^{-1} -\n", " \\left( \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{t - 1} \\right)^{-1}\n", " \\boldsymbol{\\Sigma}_{t - 1} {\\boldsymbol{\\Sigma}'}^{-1}\\\\\n", " &= {\\boldsymbol{\\Sigma}'}^{-1} -\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\mathbf{I} + \\boldsymbol{\\Sigma}_{t - 1} {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\right)^{-1}\n", " \\boldsymbol{\\Sigma}_{t - 1} {\\boldsymbol{\\Sigma}'}^{-1}\\\\\n", " &= {\\boldsymbol{\\Sigma}'}^{-1} -\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left[\n", " \\boldsymbol{\\Sigma}_{t - 1}\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t - 1}^{-1} +\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\right)\n", " \\right]^{-1}\n", " \\boldsymbol{\\Sigma}_{t - 1} {\\boldsymbol{\\Sigma}'}^{-1}\\\\\n", " &= {\\boldsymbol{\\Sigma}'}^{-1} -\n", " {\\boldsymbol{\\Sigma}'}^{-1}\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t - 1}^{-1} +\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\right)^{-1}\n", " {\\boldsymbol{\\Sigma}'}^{-1}" ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. _prince2012computer-ex-19.2:\n", "\n", "Exercise 19.2\n", "=============\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t})\n", " &= \\frac{\n", " Pr(\\mathbf{w}_t, \\mathbf{x}_{1 \\ldots t})\n", " }{\n", " Pr(\\mathbf{x}_{1 \\ldots t})\n", " }\\\\\n", " &= \\frac{\n", " Pr(\\mathbf{x}_t \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " Pr(\\mathbf{x}_{1 \\ldots t - 1})\n", " }{\n", " Pr(\\mathbf{x}_t \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " Pr(\\mathbf{x}_{1 \\ldots t - 1})\n", " }\\\\\n", " &= \\frac{\n", " Pr(\\mathbf{x}_t \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " }{\n", " \\int Pr(\\mathbf{x}_t, \\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " d\\mathbf{w}_t\n", " }\\\\\n", " &= \\frac{\n", " \\NormDist_{\\mathbf{x}_t}\\left[\n", " \\boldsymbol{\\mu}_m + \\boldsymbol{\\Phi} \\mathbf{w}_t,\n", " \\boldsymbol{\\Sigma}_m\n", " \\right]\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_+,\n", " \\boldsymbol{\\Sigma}_+\n", " \\right]\n", " }{\n", " \\int Pr(\\mathbf{x}_t \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t - 1}) d\\mathbf{w}_t\n", " }\n", " & \\quad & \\text{(19.8), (19.9)}\\\\\n", " &= \\frac{\n", " \\kappa_1 \\kappa_2 \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_t,\n", " \\boldsymbol{\\Sigma}_t\n", " \\right]\n", " }{\n", " \\int \\kappa_1 \\kappa_2\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_t, \\boldsymbol{\\Sigma}_t\n", " \\right] d\\mathbf{w}_t\n", " }\n", " & \\quad & \\text{(a), (b)}\\\\\n", " &= \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_t,\n", " \\boldsymbol{\\Sigma}_t\n", " \\right]\n", "\n", "(a)\n", "---\n", "\n", "Suppose :math:`\\mathbf{x}_\\cdot \\in \\mathbb{R}^n` and\n", ":math:`\\mathbf{w}_\\cdot \\in \\mathbb{R}^m`. By\n", ":ref:`Exercise 5.10 `,\n", "\n", ".. math::\n", "\n", " \\NormDist_{\\mathbf{x}_t}\\left[\n", " \\boldsymbol{\\mu}_m + \\boldsymbol{\\Phi} \\mathbf{w}_t,\n", " \\boldsymbol{\\Sigma}_m\n", " \\right] =\n", " \\kappa_1 \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Phi}' \\mathbf{x}_t,\n", " \\boldsymbol{\\Sigma}'\n", " \\right]\n", "\n", "where\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}'\n", " &= \\left(\n", " \\boldsymbol{\\Phi}^\\top \\boldsymbol{\\Sigma}_m^{-1} \\boldsymbol{\\Phi}\n", " \\right)^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\Phi}'\n", " &= \\boldsymbol{\\Sigma}' \\boldsymbol{\\Phi}^\\top \\boldsymbol{\\Sigma}_m^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\mu}'\n", " &= -\\boldsymbol{\\Sigma}' \\boldsymbol{\\Phi}^\\top \\boldsymbol{\\Sigma}_m^{-1}\n", " \\boldsymbol{\\mu}_m\n", " \\\\\\\\\n", " \\kappa_1\n", " &= (2 \\pi)^{(m - n) / 2}\n", " \\frac{\n", " \\left\\vert \\boldsymbol{\\Sigma}' \\right\\vert^{1 / 2}\n", " }{\n", " \\left\\vert \\boldsymbol{\\Sigma}_m \\right\\vert^{1 / 2}\n", " }\n", " \\exp\\left[\n", " -0.5\n", " (\\mathbf{x}_t - \\boldsymbol{\\mu}_m)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_m^{-1} -\n", " \\boldsymbol{\\Sigma}_m^{-1} \\boldsymbol{\\Phi} \\boldsymbol{\\Sigma}'\n", " \\boldsymbol{\\Phi}^\\top \\boldsymbol{\\Sigma}_m^{-1}\n", " \\right)\n", " (\\mathbf{x}_t - \\boldsymbol{\\mu}_m)\n", " \\right].\n", "\n", "(b)\n", "---\n", "\n", "By :ref:`Exercise 5.7 ` and\n", ":ref:`Exercise 5.9 `,\n", "\n", ".. math::\n", "\n", " \\kappa_1 \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Phi}' \\mathbf{x}_t,\n", " \\boldsymbol{\\Sigma}'\n", " \\right]\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_+, \\boldsymbol{\\Sigma}_+\n", " \\right] =\n", " \\kappa_1 \\kappa_2 \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_t, \\boldsymbol{\\Sigma}_t\n", " \\right]\n", "\n", "where\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}_t\n", " &= \\left(\n", " {\\boldsymbol{\\Sigma}'}^{-1} + \\boldsymbol{\\Sigma}_+^{-1}\n", " \\right)^{-1}\n", " = \\left(\n", " \\boldsymbol{\\Phi}^\\top \\boldsymbol{\\Sigma}_m^{-1} \\boldsymbol{\\Phi} +\n", " \\boldsymbol{\\Sigma}_+^{-1}\n", " \\right)^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\mu}_t\n", " &= \\boldsymbol{\\Sigma}_t\n", " \\left(\n", " {\\boldsymbol{\\Sigma}'}^{-1} \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Phi}' \\mathbf{x}_t\n", " \\right) +\n", " \\boldsymbol{\\Sigma}_+^{-1} \\boldsymbol{\\mu}_+\n", " \\right)\n", " = \\boldsymbol{\\Sigma}_t\n", " \\left(\n", " \\boldsymbol{\\Phi}^\\top \\boldsymbol{\\Sigma}_m^{-1} \\left(\n", " \\mathbf{x}_t - \\boldsymbol{\\mu}_m\n", " \\right) +\n", " \\boldsymbol{\\Sigma}_+^{-1} \\boldsymbol{\\mu}_+\n", " \\right)\n", " \\\\\\\\\n", " \\kappa_2\n", " &= \\NormDist_{\\boldsymbol{\\mu}' + \\boldsymbol{\\Phi}' \\mathbf{x}_t}\\left[\n", " \\boldsymbol{\\mu}_+,\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_+\n", " \\right]." ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ "Exercise 19.3\n", "=============\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t})\n", " &= \\frac{\n", " \\NormDist_{\\mathbf{x}_t}\\left[\n", " \\boldsymbol{\\mu}_m + \\boldsymbol{\\Phi} \\mathbf{w}_t,\n", " \\boldsymbol{\\Sigma}_m\n", " \\right]\n", " \\sum_{k = 1}^K \\lambda_k\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{+k},\n", " \\boldsymbol{\\Sigma}_{+k}\n", " \\right]\n", " }{\n", " \\int Pr(\\mathbf{x}_t \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " d\\mathbf{w}_t\n", " }\n", " & \\quad & \\text{(19.8) and Exercise 19.2}\\\\\n", " &= \\frac{\n", " \\kappa \\sum_{k = 1}^K \\kappa_k \\lambda_k \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{tk},\n", " \\boldsymbol{\\Sigma}_{tk}\n", " \\right]\n", " }{\n", " \\kappa \\sum_{k = 1}^K \\kappa_{k} \\lambda_k\n", " }\n", " & \\quad & \\text{(a), (b)}\\\\\n", " &= \\sum_{k = 1}^K \\lambda'_k \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{tk},\n", " \\boldsymbol{\\Sigma}_{tk}\n", " \\right]\n", " & \\quad & \\lambda'_k =\n", " \\frac{\n", " \\kappa_k \\lambda_k\n", " }{\n", " \\sum_{k' = 1}^K \\kappa_{k'} \\lambda_{k'}\n", " }.\n", "\n", "See :ref:`Exercise 19.2 ` for more details.\n", "\n", "In the next time update step, the prediction becomes\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{x}_{1 \\ldots t})\n", " &= \\int Pr(\\mathbf{w}_{t + 1}, \\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t})\n", " d\\mathbf{w}_t\n", " & \\quad & \\text{(2.1)}\\\\\n", " &= \\int Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t}) d\\mathbf{w}_t\n", " & \\quad & \\text{Markov assumption}\\\\\n", " &= \\int \\NormDist_{\\mathbf{w}_{t + 1}}\\left[\n", " \\boldsymbol{\\mu}_p + \\boldsymbol{\\Psi} \\mathbf{w}_t,\n", " \\boldsymbol{\\Sigma}_p\n", " \\right]\n", " \\sum_{k = 1}^K \\lambda'_k\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{tk}, \\boldsymbol{\\Sigma}_{tk}\n", " \\right] d\\mathbf{w}_t\n", " & \\quad & \\text{(19.6) and Exercise 19.1}\\\\\n", " &= \\sum_{k = 1}^K \\lambda'_k \\int\n", " \\NormDist_{\\mathbf{w}_{t + 1}}\\left[\n", " \\boldsymbol{\\mu}_p + \\boldsymbol{\\Psi} \\mathbf{w}_t,\n", " \\boldsymbol{\\Sigma}_p\n", " \\right]\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{tk}, \\boldsymbol{\\Sigma}_{tk}\n", " \\right] d\\mathbf{w}_t\n", " & \\quad & \\text{sum rule in integration}\\\\\n", " &= \\sum_{k = 1}^K \\lambda'_k\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_p + \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_{tk},\n", " \\boldsymbol{\\Sigma}_p +\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_{tk} \\boldsymbol{\\Psi}^\\top\n", " \\right]\n", " & \\quad & \\text{(c) from Exercise 19.1}\\\\\n", " &= \\sum_{k = 1}^K \\lambda'_k\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{+k}, \\boldsymbol{\\Sigma}_{+k}\n", " \\right].\n", "\n", "See :ref:`Exercise 19.1 ` for more details.\n", "\n", "(a)\n", "---\n", "\n", "By (a) and (b) from :ref:`Exercise 19.2 `,\n", "\n", ".. math::\n", "\n", " \\kappa \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Phi}' \\mathbf{x}_t,\n", " \\boldsymbol{\\Sigma}'\n", " \\right]\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{+k}, \\boldsymbol{\\Sigma}_{+k}\n", " \\right] =\n", " \\kappa \\kappa_{k} \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{tk}, \\boldsymbol{\\Sigma}_{tk}\n", " \\right]\n", "\n", "where\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}_{tk}\n", " &= \\left(\n", " {\\boldsymbol{\\Sigma}'}^{-1} +\n", " \\boldsymbol{\\Sigma}_{+k}^{-1}\n", " \\right)^{-1}\n", " = \\left(\n", " \\boldsymbol{\\Phi}^\\top \\boldsymbol{\\Sigma}_m^{-1} \\boldsymbol{\\Phi} +\n", " \\boldsymbol{\\Sigma}_{+k}^{-1}\n", " \\right)^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\mu}_{tk}\n", " &= \\boldsymbol{\\Sigma}_{tk}\n", " \\left(\n", " {\\boldsymbol{\\Sigma}'}^{-1} \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Phi}' \\mathbf{x}_t\n", " \\right) +\n", " \\boldsymbol{\\Sigma}_{+k}^{-1} \\boldsymbol{\\mu}_{+k}\n", " \\right)\n", " = \\boldsymbol{\\Sigma}_t\n", " \\left(\n", " \\boldsymbol{\\Phi}^\\top \\boldsymbol{\\Sigma}_m^{-1} \\left(\n", " \\mathbf{x}_t - \\boldsymbol{\\mu}_m\n", " \\right) +\n", " \\boldsymbol{\\Sigma}_{+k}^{-1} \\boldsymbol{\\mu}_{+k}\n", " \\right)\n", " \\\\\\\\\n", " \\kappa_{k}\n", " &= \\NormDist_{\\boldsymbol{\\mu}' + \\boldsymbol{\\Phi}' \\mathbf{x}_t}\\left[\n", " \\boldsymbol{\\mu}_{+k},\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_{+k}\n", " \\right].\n", "\n", "(b)\n", "---\n", "\n", ".. math::\n", "\n", " \\int \\kappa \\sum_{k = 1}^K \\kappa_{k} \\lambda_k\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{tk}, \\boldsymbol{\\Sigma}_{tk}\n", " \\right] d\\mathbf{w}_t\n", " &= \\kappa \\sum_{k = 1}^K \\int \\kappa_{k} \\lambda_k\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{tk}, \\boldsymbol{\\Sigma}_{tk}\n", " \\right] d\\mathbf{w}_t\n", " & \\quad & \\text{sum rule in integration}\\\\\n", " &= \\kappa \\sum_{k = 1}^K \\kappa_{k} \\lambda_k\n", " & \\quad & \\text{sum rule in integration}" ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ "Exercise 19.4\n", "=============\n", "\n", "The max-marginals inference is essentially (10.16):\n", "\n", ".. math::\n", "\n", " \\DeclareMathOperator*{\\argmax}{arg\\,max}\n", " \\hat{\\mathbf{w}} =\n", " \\argmax_{\\mathbf{w}_t} Pr(\\mathbf{w}_t \\mid \\mathbf{w}_{t - 1}).\n", "\n", "The temporal model could still be (19.5) where\n", ":math:`\\boldsymbol{\\Psi} = \\boldsymbol{\\Psi}_1` or\n", ":math:`\\boldsymbol{\\Psi} = \\boldsymbol{\\Psi}_2`.\n", "\n", "A simple strategy could be to choose the state transition matrix that maximizes\n", "the current time step :cite:`ghahramani2000variational`." ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ "Exercise 19.5\n", "=============\n", "\n", "The joint posterior distribution can be factorized into an HMM (11.1), which\n", "can be solved in :math:`\\mathcal{O}(TK^2)` using the Viterbi algorithm where\n", ":math:`K` is the number of possible states (see\n", ":ref:`Exercise 11.2 ` for more details).\n", "\n", "In the Kalman filter, :math:`T` grows as more measurements are taken, so\n", "computing the marginal posteriors is preferred because it can be solved for in\n", "closed form." ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ "Exercise 19.6\n", "=============\n", "\n", "The following are based on Section 11.4.4 and :cite:`schonborncs351gmspa`.\n", "\n", "The forward pass starts with\n", "\n", ".. math::\n", "\n", " \\mathbf{m}_{\\mathbf{x}_1 \\rightarrow g_1} =\n", " \\delta[\\mathbf{x}_1^*]\n", " \\qquad \\text{(11.36).}\n", "\n", "The message is then forwarded as\n", "\n", ".. math::\n", "\n", " \\mathbf{m}_{g_1 \\rightarrow \\mathbf{w}_1}\n", " &= \\int Pr(\\mathbf{x}_1 \\mid \\mathbf{w}_1)\n", " \\delta\\left[ \\mathbf{x}_1^* \\right] d\\mathbf{x}_1\\\\\n", " &= Pr(\\mathbf{x}_1 = \\mathbf{x}_1^* \\mid \\mathbf{w}_1)\n", " & \\quad & \\text{(11.37).}\n", "\n", "Generalizing the message yields the measurement model\n", "\n", ".. math::\n", "\n", " \\mathbf{m}_{g_t \\rightarrow \\mathbf{w}_t} =\n", " Pr(\\mathbf{x}_t = \\mathbf{x}_t^* \\mid \\mathbf{w}_t)\n", " \\qquad \\text{(19.8).}\n", "\n", "At time step :math:`t = 1`, the result is arbitrary as suggested in the\n", "paragraph after (19.16) where\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{x}_t = \\mathbf{x}_t^* \\mid \\mathbf{w}_t)\n", " &= \\frac{\n", " Pr(\\mathbf{x}_t = \\mathbf{x}_t^*, \\mathbf{w}_t)\n", " }{\n", " Pr(\\mathbf{w}_t)\n", " }\\\\\n", " &= \\frac{\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_t = \\mathbf{x}_t^*)\n", " Pr(\\mathbf{x}_t = \\mathbf{x}_t^*)\n", " }{\n", " Pr(\\mathbf{w}_t)\n", " }\\\\\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_t = \\mathbf{x}_t^*)\n", " &= \\frac{\n", " Pr(\\mathbf{x}_t = \\mathbf{x}_t^* \\mid \\mathbf{w}_t) Pr(\\mathbf{w}_t)\n", " }{\n", " Pr(\\mathbf{x}_t = \\mathbf{x}_t^*)\n", " }\n", " & \\quad & \\text{(19.1).}\n", "\n", "This means the first hidden variable adds prior information and forwards the\n", "message normalized as\n", "\n", ".. math::\n", "\n", " \\mathbf{m}_{\\mathbf{w}_1 \\rightarrow g_{12}}\n", " &= \\mathbf{m}_{g_1 \\rightarrow \\mathbf{w}_1}\n", " \\frac{Pr(\\mathbf{w}_1)}{Pr(\\mathbf{x}_1 = \\mathbf{x}_t^*)}\\\\\n", " &= \\frac{\n", " Pr(\\mathbf{x}_1 = \\mathbf{x}_1^* \\mid \\mathbf{w}_1) Pr(\\mathbf{w}_1)\n", " }{\n", " Pr(\\mathbf{x}_1 = \\mathbf{x}_1^*)\n", " }\\\\\n", " &= Pr(\\mathbf{w}_1 \\mid \\mathbf{x}_1 = \\mathbf{x}_1^*)\n", " & \\quad & \\text{(11.35).}\n", "\n", "Generalizing what the function node (at :math:`t > 1`) forwards yields the\n", "prediction step\n", "\n", ".. math::\n", "\n", " \\mathbf{m}_{g_{t - 1, t} \\rightarrow \\mathbf{w}_t}\n", " &= \\int Pr(\\mathbf{w}_t \\mid \\mathbf{w}_{t - 1})\n", " Pr(\\mathbf{w}_{t - 1} \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " d\\mathbf{w}_{t - 1}\\\\\n", " &= Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t - 1})\n", " & \\quad & \\text{(11.37), (19.9).}\n", "\n", "Generalizing what the unobserved variable (at :math:`t > 1`) forwards yields the\n", "measurement incorporation step\n", "\n", ".. math::\n", "\n", " \\mathbf{m}_{\\mathbf{w}_t \\rightarrow g_{t, t + 1}}\n", " &= \\frac{\n", " \\mathbf{m}_{g_{t} \\rightarrow \\mathbf{w}_t}\n", " \\mathbf{m}_{g_{t - 1, t} \\rightarrow \\mathbf{w}_t}\n", " }{\n", " Pr(\\mathbf{x}_{1 \\ldots t})\n", " }\\\\\n", " &= Pr(\\mathbf{x}_t = \\mathbf{x}_t^* \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t - 1})\\\\\n", " &= Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t})\n", " & \\quad & \\text{(11.35), (19.10).}\n", "\n", "Notice that the backward pass is not needed because the forward pass propagates\n", "normalized messages." ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. _prince2012computer-ex-19.7:\n", "\n", "Exercise 19.7\n", "=============\n", "\n", "By inspection, the fixed interval smoother occurs after the Kalman filter i.e.\n", "wait until :math:`T` observations have been made and then retrospectively\n", "calculate :math:`Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots T})` for\n", ":math:`t < T`.\n", "\n", "The base case of this inductive proof is\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_T \\mid \\mathbf{x}_{1 \\ldots T})\n", " &= \\frac{\n", " Pr(\\mathbf{x}_T \\mid \\mathbf{w}_T)\n", " Pr(\\mathbf{w}_T \\mid \\mathbf{x}_{1 \\ldots T - 1})\n", " }{\n", " Pr(\\mathbf{x}_{1 \\ldots T})\n", " }\\\\\n", " &= \\NormDist_{\\mathbf{w}_T}\\left[\n", " \\boldsymbol{\\mu}_{T \\mid T}, \\boldsymbol{\\Sigma}_{T \\mid T}\n", " \\right]\n", " & \\quad & \\text{(19.10).}\n", "\n", "Insights from :cite:`fletcher2010kalman` suggest that the D-separation should be\n", "invoked. The inductive step is then\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots T})\n", " &= \\int Pr(\\mathbf{w}_{t + 1}, \\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots T})\n", " d\\mathbf{w}_{t + 1}\n", " & \\quad & \\text{(2.1)}\\\\\n", " &= \\int Pr(\\mathbf{w}_t \\mid \\mathbf{w}_{t + 1}, \\mathbf{x}_{1 \\ldots T})\n", " Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{x}_{1 \\ldots T})\n", " d\\mathbf{w}_{t + 1}\n", " & \\quad & \\text{(2.6) with Markov assumption}\\\\\n", " &= \\int Pr(\\mathbf{w}_t \\mid \\mathbf{w}_{t + 1}, \\mathbf{x}_{1 \\ldots t})\n", " Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{x}_{1 \\ldots T})\n", " d\\mathbf{w}_{t + 1}\n", " & \\quad & \\text{D-separation}\\\\\n", " &= \\int\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}'_{t + 1}, \\boldsymbol{\\Sigma}'_{t + 1}\n", " \\right]\n", " \\NormDist_{\\mathbf{w}_{t + 1}}\\left[\n", " \\boldsymbol{\\mu}_{t + 1 \\mid T}, \\boldsymbol{\\Sigma}_{t + 1 \\mid T}\n", " \\right]\n", " d\\mathbf{w}_{t + 1}\n", " & \\quad & \\text{(a)}\\\\\n", " &= \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{t \\mid T},\n", " \\boldsymbol{\\Sigma}_{t \\mid T}\n", " \\right]\n", " & \\quad & \\text{(b).}\n", "\n", "(a)\n", "---\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{w}_{t + 1}, \\mathbf{x}_{1 \\ldots t})\n", " &= \\frac{\n", " Pr(\\mathbf{w}_t, \\mathbf{w}_{t + 1}, \\mathbf{x}_{1 \\ldots t})\n", " }{\n", " Pr(\\mathbf{w}_{t + 1}, \\mathbf{x}_{1 \\ldots t})\n", " }\n", " & \\quad & \\text{(2.4)}\\\\\n", " &= \\frac{\n", " Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t})\n", " Pr(\\mathbf{x}_{1 \\ldots t})\n", " }{\n", " Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{x}_{1 \\ldots t})\n", " Pr(\\mathbf{x}_{1 \\ldots t})\n", " }\n", " & \\quad & \\text{(2.5)}\\\\\n", " &= \\frac{\n", " Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t})\n", " }{\n", " \\int Pr(\\mathbf{w}_t, \\mathbf{w}_{t + 1} \\mid \\mathbf{x}_{1 \\ldots t})\n", " d\\mathbf{w}_{t}\n", " }\n", " & \\quad & \\text{(2.1)}\\\\\n", " &= \\frac{\n", " Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t})\n", " }{\n", " \\int Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t}) d\\mathbf{w}_{t}\n", " }\n", " & \\quad & \\text{(2.6) with Markov assumption}\\\\\n", " &= \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}'_{t + 1}, \\boldsymbol{\\Sigma}'_{t + 1}\n", " \\right]\n", " & \\quad & \\text{(a.1)}\n", "\n", "(a.1)\n", "-----\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{w}_t)\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots t})\n", " &= \\NormDist_{\\mathbf{w}_{t + 1}}\\left[\n", " \\boldsymbol{\\mu}_p + \\boldsymbol{\\Psi} \\mathbf{w}_t,\n", " \\boldsymbol{\\Sigma}_p\n", " \\right]\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_t, \\boldsymbol{\\Sigma}_t\n", " \\right]\n", " & \\quad & \\text{(19.6), (19.10)}\\\\\n", " &= \\kappa_1 \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_{t + 1},\n", " \\boldsymbol{\\Sigma}'\n", " \\right]\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_t, \\boldsymbol{\\Sigma}_t\n", " \\right]\n", " & \\quad & \\text{(a.2)}\\\\\n", " &= \\kappa_1 \\kappa_2 \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}'_{t + 1}, \\boldsymbol{\\Sigma}'_{t + 1}\n", " \\right]\n", " & \\quad & \\text{Exercise 5.7 and 5.9}\n", "\n", "where\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}'_{t + 1}\n", " &= \\left(\n", " \\boldsymbol{\\Sigma}'^{-1} + \\boldsymbol{\\Sigma}_t^{-1}\n", " \\right)^{-1}\\\\\n", " &= \\left(\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} +\n", " \\boldsymbol{\\Sigma}_t^{-1}\n", " \\right)^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\mu}'_{t + 1}\n", " &= \\boldsymbol{\\Sigma}'_{t + 1}\n", " \\left(\n", " \\boldsymbol{\\Sigma}'^{-1}\n", " \\left(\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_{t + 1}\n", " \\right) +\n", " \\boldsymbol{\\Sigma}_t^{-1} \\boldsymbol{\\mu}_t\n", " \\right)\\\\\n", " &= \\boldsymbol{\\Sigma}'_{t + 1}\n", " \\left(\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\left(\n", " \\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_p\n", " \\right) +\n", " \\boldsymbol{\\Sigma}_t^{-1} \\boldsymbol{\\mu}_t\n", " \\right)\\\\\n", " &= \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1} \\mathbf{w}_{t + 1} -\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\mu}_p +\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Sigma}_t^{-1}\n", " \\boldsymbol{\\mu}_t\n", " \\\\\\\\\n", " \\kappa_2\n", " &= \\NormDist_{\\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_{t + 1}}\n", " \\left[\n", " \\boldsymbol{\\mu}_t,\n", " \\boldsymbol{\\Sigma}' + \\boldsymbol{\\Sigma}_t\n", " \\right].\n", "\n", "See :ref:`Exercise 5.7 ` and\n", ":ref:`Exercise 5.9 ` for more details.\n", "\n", "(a.2)\n", "-----\n", "\n", "By :ref:`Exercise 5.10 `,\n", "\n", ".. math::\n", "\n", " \\NormDist_{\\mathbf{w}_{t + 1}}\\left[\n", " \\boldsymbol{\\mu}_p + \\boldsymbol{\\Psi} \\mathbf{w}_t,\n", " \\boldsymbol{\\Sigma}_p\n", " \\right] =\n", " \\kappa_1 \\text{Norm}_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}' + \\boldsymbol{\\Psi}' \\mathbf{w}_{t + 1},\n", " \\boldsymbol{\\Sigma}'\n", " \\right]\n", "\n", "where\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}'\n", " &= \\left(\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\right)^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\Psi}'\n", " &= \\boldsymbol{\\Sigma}' \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\\\\\\\\n", " \\boldsymbol{\\mu}'\n", " &= -\\boldsymbol{\\Sigma}' \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\boldsymbol{\\mu}_p\n", " \\\\\\\\\n", " \\kappa_1\n", " &= \\frac{\n", " \\left\\vert \\boldsymbol{\\Sigma}' \\right\\vert^{1 / 2}\n", " }{\n", " \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert^{1 / 2}\n", " }\n", " \\exp\\left[\n", " -0.5\n", " (\\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_p)^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p^{-1} -\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}'\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " (\\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_p)\n", " \\right].\n", "\n", "(b)\n", "---\n", "\n", "The generative equations for the distributions from\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots T}) =\n", " \\int Pr(\\mathbf{w}_t \\mid \\mathbf{w}_{t + 1}, \\mathbf{x}_{1 \\ldots t})\n", " Pr(\\mathbf{w}_{t + 1} \\mid \\mathbf{x}_{1 \\ldots T}) d\\mathbf{w}_{t + 1}\n", "\n", "are\n", "\n", ".. math::\n", "\n", " \\mathbf{w}_t\n", " &= \\boldsymbol{\\mu}'_{t + 1} + \\boldsymbol{\\epsilon}_{t + 1}\\\\\n", " &= \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1} \\mathbf{w}_{t + 1} -\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\mu}_p +\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Sigma}_t^{-1} \\boldsymbol{\\mu}_t +\n", " \\boldsymbol{\\epsilon}_{t + 1}\n", " & \\quad & \\text{(a.1)}\\\\\n", " &= \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1} \\mathbf{w}_{t + 1} -\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\mu}_p +\n", " \\left(\n", " \\boldsymbol{\\Sigma}_t -\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_t\n", " \\right) \\boldsymbol{\\Sigma}_t^{-1} \\boldsymbol{\\mu}_t +\n", " \\boldsymbol{\\epsilon}_{t + 1}\n", " & \\quad & \\text{Exercise 5.9 (a)}\\\\\n", " &= \\boldsymbol{\\mu}_t +\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " \\left(\n", " \\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_p -\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\mu}_t\n", " \\right) +\n", " \\boldsymbol{\\epsilon}_{t + 1}\\\\\n", " &= \\boldsymbol{\\mu}_t +\n", " \\mathbf{C}_t\n", " \\left(\n", " \\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_{+ \\mid t + 1}\n", " \\right) +\n", " \\boldsymbol{\\epsilon}_{t + 1}\n", " & \\quad & \\text{(b.1) and (19.9)}\n", "\n", "and\n", "\n", ".. math::\n", "\n", " \\mathbf{w}_{t + 1} =\n", " \\boldsymbol{\\mu}_{t + 1 \\mid T} + \\boldsymbol{\\epsilon}_{t + 1 \\mid T}\n", "\n", "where\n", "\n", ".. math::\n", "\n", " \\DeclareMathOperator{\\Cov}{\\mathrm{Cov}}\n", " \\DeclareMathOperator{\\E}{\\mathrm{E}}\n", " \\E[\\boldsymbol{\\epsilon}_{t + 1}]\n", " &= \\E[\\boldsymbol{\\epsilon}_{t + 1 \\mid T}]\n", " = \\boldsymbol{0}\n", " \\\\\\\\\n", " \\Cov(\\boldsymbol{\\epsilon}_{t + 1}, \\boldsymbol{\\epsilon}_{t + 1})\n", " &= \\E\\left[\n", " \\left(\n", " \\boldsymbol{\\epsilon}_{t + 1} -\n", " \\E[\\boldsymbol{\\epsilon}_{t + 1}]\n", " \\right)\n", " \\left(\n", " \\boldsymbol{\\epsilon}_{t + 1} -\n", " \\E[\\boldsymbol{\\epsilon}_{t + 1}]\n", " \\right)^\\top\n", " \\right]\n", " = \\E\\left[\n", " \\boldsymbol{\\epsilon}_{t + 1} \\boldsymbol{\\epsilon}_{t + 1}^\\top\n", " \\right] -\n", " \\E[\\boldsymbol{\\epsilon}_{t + 1}] \\E[\\boldsymbol{\\epsilon}_{t + 1}]^\\top\n", " = \\boldsymbol{\\Sigma}'_{t + 1}\n", " \\\\\\\\\n", " \\Cov\\left(\n", " \\boldsymbol{\\epsilon}_{t + 1 \\mid T},\n", " \\boldsymbol{\\epsilon}_{t + 1 \\mid T}\n", " \\right)\n", " &= \\E\\left[\n", " \\boldsymbol{\\epsilon}_{t + 1 \\mid T}\n", " \\boldsymbol{\\epsilon}_{t + 1 \\mid T}^\\top\n", " \\right] -\n", " \\E[\\boldsymbol{\\epsilon}_{t + 1 \\mid T}]\n", " \\E[\\boldsymbol{\\epsilon}_{t + 1 \\mid T}]^\\top\n", " = \\boldsymbol{\\Sigma}_{t + 1 \\mid T}\n", " \\\\\\\\\n", " \\Cov(\\boldsymbol{\\epsilon}_{t + 1}, \\boldsymbol{\\epsilon}_{t + 1 \\mid T})\n", " &= \\boldsymbol{0},\n", "\n", "which implies\n", "\n", ".. math::\n", "\n", " \\Cov(\\mathbf{w}_{t + 1}, \\boldsymbol{\\epsilon}_{t + 1}) =\n", " \\boldsymbol{0}.\n", "\n", "These assumptions result in\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{x}_{1 \\ldots T}) =\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}_{t \\mid T},\n", " \\boldsymbol{\\Sigma}_{t \\mid T}\n", " \\right]\n", " \\qquad \\text{(b.3), (b.4).}\n", "\n", "(b.1)\n", "-----\n", "\n", ".. math::\n", "\n", " \\mathbf{C}_t\n", " &= \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\\\\\n", " &= \\left(\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi} +\n", " \\boldsymbol{\\Sigma}_t^{-1}\n", " \\right)^{-1}\n", " \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\\\\\n", " &= \\boldsymbol{\\Sigma}_t \\boldsymbol{\\Psi}^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p +\n", " \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_t \\boldsymbol{\\Psi}^\\top\n", " \\right)^{-1}\\\\\n", " &= \\boldsymbol{\\Sigma}_t \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_{+ \\mid t + 1}^{-1}\n", " & \\quad & \\text{(19.9)}\n", "\n", "To simplify notations, define :math:`A = \\boldsymbol{\\Sigma}_p` and\n", ":math:`B = \\boldsymbol{\\Psi} \\boldsymbol{\\Sigma}_t \\boldsymbol{\\Psi}^\\top`.\n", "By :ref:`Exercise 5.9 (a) `,\n", ":math:`\\boldsymbol{\\Sigma}_t =\n", "\\boldsymbol{\\Sigma}'_{t + 1}\n", "\\left(\n", "\\mathbf{I} +\n", "\\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", "\\boldsymbol{\\Sigma}_t\n", "\\right)`.\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}_t \\boldsymbol{\\Psi}^\\top (A + B)^{-1}\n", " &= \\boldsymbol{\\Sigma}'_{t + 1}\n", " \\left(\n", " \\mathbf{I} +\n", " \\boldsymbol{\\Psi}^\\top \\boldsymbol{\\Sigma}_p^{-1} \\boldsymbol{\\Psi}\n", " \\boldsymbol{\\Sigma}_t\n", " \\right)\n", " \\boldsymbol{\\Psi}^\\top\n", " (A + B)^{-1}\\\\\n", " &= \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top (A + B)^{-1} +\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " A^{-1} B (A + B)^{-1}\\\\\n", " &= \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top (A + B)^{-1} +\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top A^{-1} B\n", " \\left( B^{-1} - (A + B)^{-1} A B^{-1} \\right)\n", " & \\quad & \\text{Exercise 5.9 (a)}\\\\\n", " &= \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top A^{-1} +\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top (A + B)^{-1} -\n", " \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " A^{-1} B (A + B)^{-1} A B^{-1}\\\\\n", " &= \\boldsymbol{\\Sigma}'_{t + 1} \\boldsymbol{\\Psi}^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " & \\quad & \\text{(b.2)}\n", "\n", "(b.2)\n", "-----\n", "\n", ".. math::\n", "\n", " A^{-1} B (A + B)^{-1} A B^{-1}\n", " &= A^{-1} B \\left( A^{-1} - (A + B)^{-1} B A^{-1} \\right) A B^{-1}\n", " & \\quad & \\text{Exercise 5.9 (a)}\\\\\n", " &= A^{-1} B A^{-1} A B^{-1} - A^{-1} B (A + B)^{-1} B A^{-1} A B^{-1}\\\\\n", " &= A^{-1} - A^{-1} B (A + B)^{-1}\\\\\n", " A^{-1} B (A + B)^{-1} A B^{-1} (A + B)\n", " &= \\left( A^{-1} - A^{-1} B (A + B)^{-1} \\right) (A + B)\\\\\n", " &= A^{-1} (A + B) - A^{-1} B\\\\\n", " &= \\mathbf{I}\\\\\n", " A^{-1} B (A + B)^{-1} A B^{-1}\n", " &= (A + B)^{-1}\n", "\n", "(b.3)\n", "-----\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\mu}_{t \\mid T}\n", " &= \\E[\\mathbf{w}_t]\\\\\n", " &= \\boldsymbol{\\mu}_t +\n", " \\mathbf{C}_t\n", " \\left(\n", " \\E[\\mathbf{w}_{t + 1}] - \\boldsymbol{\\mu}_{+ \\mid t + 1}\n", " \\right) +\n", " \\E[\\boldsymbol{\\epsilon}_{t + 1}]\n", " & \\quad & \\text{(2.14), (2.15), (2.16)}\\\\\n", " &= \\boldsymbol{\\mu}_t +\n", " \\mathbf{C}_t \\left(\n", " \\boldsymbol{\\mu}_{t + 1 \\mid T} - \\boldsymbol{\\mu}_{+ \\mid t + 1}\n", " \\right)\n", "\n", "(b.4)\n", "-----\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}_{t \\mid T}\n", " &= \\Cov(\\mathbf{w}_t, \\mathbf{w}_t)\\\\\n", " &= \\E\\left[\n", " \\left(\n", " \\mathbf{w}_t - \\E[\\mathbf{w}_t]\n", " \\right)\n", " \\left(\n", " \\mathbf{w}_t - \\E[\\mathbf{w}_t]\n", " \\right)^\\top\n", " \\right]\\\\\n", " &= \\E\\left[\n", " \\left(\n", " \\mathbf{C}_t\n", " \\left(\n", " \\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_{t + 1 \\mid T}\n", " \\right) +\n", " \\boldsymbol{\\epsilon}_{t + 1}\n", " \\right)\n", " \\left(\n", " \\mathbf{C}_t\n", " \\left(\n", " \\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_{t + 1 \\mid T}\n", " \\right) +\n", " \\boldsymbol{\\epsilon}_{t + 1}\n", " \\right)^\\top\n", " \\right]\\\\\n", " &= \\mathbf{C}_t \\E\\left[\n", " \\left(\n", " \\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_{t + 1 \\mid T}\n", " \\right)\n", " \\left(\n", " \\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_{t + 1 \\mid T}\n", " \\right)^\\top\n", " \\right] \\mathbf{C}_t^\\top +\n", " \\mathbf{C}_t\n", " \\E\\left[\n", " \\left(\n", " \\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_{t + 1 \\mid T}\n", " \\right) \\boldsymbol{\\epsilon}_{t + 1}^\\top\n", " \\right] +\n", " \\E\\left[\n", " \\boldsymbol{\\epsilon}_{t + 1}\n", " \\left(\n", " \\mathbf{w}_{t + 1} - \\boldsymbol{\\mu}_{t + 1 \\mid T}\n", " \\right)^\\top\n", " \\right] \\mathbf{C}_t^\\top +\n", " \\E\\left[\n", " \\boldsymbol{\\epsilon}_{t + 1} \\boldsymbol{\\epsilon}_{t + 1}^\\top\n", " \\right]\\\\\n", " &= \\mathbf{C}_t \\boldsymbol{\\Sigma}_{t + 1 \\mid T} \\mathbf{C}_t^\\top +\n", " \\boldsymbol{\\Sigma}'_{t + 1}\\\\\n", " &= \\mathbf{C}_t \\boldsymbol{\\Sigma}_{t + 1 \\mid T} \\mathbf{C}_t^\\top +\n", " \\left(\n", " \\boldsymbol{\\Sigma}_t -\n", " \\boldsymbol{\\Sigma}_t \\boldsymbol{\\Psi}_t^\\top\n", " \\left(\n", " \\boldsymbol{\\Sigma}_p +\n", " \\boldsymbol{\\Psi}_t \\boldsymbol{\\Sigma}_t \\boldsymbol{\\Psi}_t^\\top\n", " \\right)^{-1}\n", " \\boldsymbol{\\Psi}_t \\boldsymbol{\\Sigma}_t\n", " \\right)\n", " & \\quad & \\text{(C.61)}\\\\\n", " &= \\boldsymbol{\\Sigma}_t +\n", " \\mathbf{C}_t \\boldsymbol{\\Sigma}_{t + 1 \\mid T} \\mathbf{C}_t^\\top -\n", " \\mathbf{C}_t\n", " \\boldsymbol{\\Sigma}_{+ \\mid t + 1}^\\top \\mathbf{C}_t^\\top\n", " & \\quad & \\text{(b.1)}\\\\\n", " &= \\boldsymbol{\\Sigma}_t +\n", " \\mathbf{C}_t\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t + 1 \\mid T} -\n", " \\boldsymbol{\\Sigma}_{+ \\mid t + 1}\n", " \\right)\n", " \\mathbf{C}_t^\\top" ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ "Exercise 19.8\n", "=============\n", "\n", "The graphical model for the Kalman filter is\n", "\n", ".. math::\n", "\n", " Pr(\\{ \\mathbf{x}_n \\}_{n = 1}^N, \\{ \\mathbf{w}_n \\}_{n = 1}^N) =\n", " \\left( \\prod_{n = 1}^N Pr(\\mathbf{x}_n \\mid \\mathbf{w}_n) \\right)\n", " \\left( \\prod_{n = 2}^N Pr(\\mathbf{w}_n \\mid \\mathbf{w}_{n - 1}) \\right)\n", " Pr(\\mathbf{w}_1)\n", " \\qquad \\text{(10.19), (11.1).}\n", "\n", ":cite:`archambeau2008fsds` is good for verifying this previous result and\n", ":ref:`Exercise 19.7 `. :cite:`mackey2014lgssm`\n", "could also serve to verify the results of this exercise.\n", "\n", "Note that :cite:`brushe1996forward,mahony1996hybrid` are useless; one should not\n", "even consider wasting their time to skim these papers. Just read the book's\n", "explanations instead.\n", "\n", "(i)\n", "---\n", "\n", "This supervised learning scenario is a fully observed Markov model\n", "(:cite:`jordan2001introduction`) i.e. the training set consists of :math:`I`\n", "matched sets of states :math:`\\{ \\mathbf{w}_{in} \\}_{i = 1, n = 1}^{I, N}` and\n", "measurements :math:`\\{ \\mathbf{x}_{in} \\}_{i = 1, n = 1}^{I, N}`.\n", "\n", "Maximum likelihood (or another technique like maximum a posteriori and the\n", "Bayesian approach) can be applied to fit the parameters\n", ":math:`\\boldsymbol{\\theta} =\n", "\\left\\{\n", "\\boldsymbol{\\mu}_0, \\boldsymbol{\\Sigma}_0,\n", "\\boldsymbol{\\mu}_p, \\boldsymbol{\\Sigma}_p, \\boldsymbol{\\Psi},\n", "\\boldsymbol{\\mu}_m, \\boldsymbol{\\Sigma}_m, \\boldsymbol{\\Phi}\n", "\\right\\}` to the data:\n", "\n", ".. math::\n", "\n", " \\hat{\\boldsymbol{\\theta}}\n", " &= \\argmax_{\\boldsymbol{\\theta}} \\prod_{i = 1}^I\n", " Pr(\\{ \\mathbf{x}_{in} \\}_{n = 1}^N,\n", " \\{ \\mathbf{w}_{in} \\}_{n = 1}^N \\mid \\boldsymbol{\\theta})\n", " & \\quad & \\text{(10.21)}\\\\\n", " &= \\argmax_{\\boldsymbol{\\theta}}\n", " \\sum_{i = 1}^I\n", " \\log Pr(\\mathbf{w}_{i1} \\mid \\boldsymbol{\\theta}) +\n", " \\sum_{n = 1}^N\n", " \\log Pr(\\mathbf{x}_{in} \\mid\n", " \\mathbf{w}_{in}, \\boldsymbol{\\theta}) +\n", " \\sum_{n = 2}^N\n", " \\log Pr(\\mathbf{w}_{in} \\mid\n", " \\mathbf{w}_{i(n - 1)}, \\boldsymbol{\\theta})\\\\\n", " &= \\argmax_{\\boldsymbol{\\theta}}\n", " \\sum_{i = 1}^I\n", " \\log \\NormDist_{\\mathbf{w}_{i1}}\\left[\n", " \\boldsymbol{\\mu}_0, \\boldsymbol{\\Sigma}_0\n", " \\right] +\n", " \\sum_{n = 1}^N\n", " \\log \\NormDist_{\\mathbf{x}_{in}}\n", " \\left[\n", " \\boldsymbol{\\mu}_m + \\boldsymbol{\\Phi} \\mathbf{w}_{in},\n", " \\boldsymbol{\\Sigma}_m\n", " \\right] +\n", " \\sum_{n = 2}^N\n", " \\log \\NormDist_{\\mathbf{w}_{in}}\n", " \\left[\n", " \\boldsymbol{\\mu}_p + \\boldsymbol{\\Psi} \\mathbf{w}_{i(n - 1)},\n", " \\boldsymbol{\\Sigma}_p\n", " \\right]\n", " & \\quad & \\text{(i.a), (19.6), (19.8)}\\\\\n", " &= \\argmax_{\\boldsymbol{\\theta}}\n", " -\\frac{1}{2} \\sum_{i = 1}^I\n", " D_w \\log 2 \\pi +\n", " \\log \\left\\vert \\boldsymbol{\\Sigma}_0 \\right\\vert +\n", " \\left( \\mathbf{w}_{i1} - \\boldsymbol{\\mu}_0 \\right)^\\top\n", " \\boldsymbol{\\Sigma}_0^{-1}\n", " \\left( \\mathbf{w}_{i1} - \\boldsymbol{\\mu}_0 \\right) +\\\\\n", " &\\qquad\n", " \\sum_{n = 1}^N\n", " D_m \\log 2 \\pi +\n", " \\log \\left\\vert \\boldsymbol{\\Sigma}_m \\right\\vert +\n", " \\left(\n", " \\mathbf{x}_{in} - \\boldsymbol{\\mu}_m -\n", " \\boldsymbol{\\Phi} \\mathbf{w}_{in}\n", " \\right)^\\top\n", " \\boldsymbol{\\Sigma}_m^{-1}\n", " \\left(\n", " \\mathbf{x}_{in} - \\boldsymbol{\\mu}_m -\n", " \\boldsymbol{\\Phi} \\mathbf{w}_{in}\n", " \\right) +\\\\\n", " &\\qquad\n", " \\sum_{n = 2}^N\n", " D_p \\log 2 \\pi +\n", " \\log \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert +\n", " \\left(\n", " \\mathbf{w}_{in} - \\boldsymbol{\\mu}_p -\n", " \\boldsymbol{\\Psi} \\mathbf{w}_{i(n - 1)}\n", " \\right)^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " \\left(\n", " \\mathbf{w}_{in} - \\boldsymbol{\\mu}_p -\n", " \\boldsymbol{\\Psi} \\mathbf{w}_{i(n - 1)}\n", " \\right)\n", " & \\quad & \\text{(5.1)}\n", "\n", "(ii)\n", "----\n", "\n", "This unsupervised learning scenario treats the states\n", ":math:`\\{ \\mathbf{w}_{in} \\}_{i = 1, n = 1}^{I, N}` as hidden and only the\n", "measurements :math:`\\{ \\mathbf{x}_{in} \\}_{i = 1, n = 1}^{I, N}` are observed\n", "resulting in\n", "\n", ".. math::\n", "\n", " \\hat{\\boldsymbol{\\theta}}\n", " &= \\argmax_{\\boldsymbol{\\theta}}\n", " \\prod_{i = 1}^I\n", " Pr(\\{ \\mathbf{x}_{in} \\}_{n = 1}^N \\mid \\boldsymbol{\\theta})\\\\\n", " &= \\argmax_{\\boldsymbol{\\theta}}\n", " \\prod_{i = 1}^I\n", " \\int Pr(\\{ \\mathbf{x}_{in} \\}_{n = 1}^N, \\mathbf{h}_i \\mid\n", " \\boldsymbol{\\theta})\n", " d\\mathbf{h}_i\n", "\n", "where :math:`\\mathbf{h}_i = \\{ \\mathbf{w}_{in} \\}_{n = 1}^N`, which can be\n", "solved using the EM algorithm :cite:`parrabmei5100`.\n", "\n", "The E-step consists of computing the posterior distribution over the states for\n", "each time sequence\n", "\n", ".. math::\n", "\n", " q_i(\\mathbf{h}_i)\n", " &= Pr(\\mathbf{h}_i \\mid\n", " \\{ \\mathbf{x}_{in} \\}_{n = 1}^N, \\boldsymbol{\\theta})\\\\\n", " &= Pr(\\{ \\mathbf{w}_{in} \\}_{n = 1}^N |\n", " \\{ \\mathbf{x}_{in} \\}_{n = 1}^N, \\boldsymbol{\\theta})\\\\\n", " &= Pr(\\mathbf{w}_{iN} \\mid\n", " \\{ \\mathbf{x}_{in} \\}_{n = 1}^N, \\boldsymbol{\\theta})\n", " \\prod_{n = 1}^{N - 1}\n", " Pr(\\mathbf{w}_{i(N - n)} \\mid \\mathbf{w}_{i(N - n + 1)},\n", " \\{ \\mathbf{x}_{in} \\}_{n = 1}^{N - n},\n", " \\boldsymbol{\\theta})\n", " & \\quad & \\text{Exercise 19.7 (a),}\n", "\n", "which can be computed using the terms that result from running the Kalman filter\n", "followed by the Kalman fixed interval smoother. See\n", ":ref:`Exercise 19.7 ` for more details. It is\n", "important to realize that :math:`q_i(\\mathbf{h}_i)` itself is not used directly\n", "in the M-step; the E-step's purpose is to estimate the expected value and\n", "covariance of each hidden variable\n", "\n", ".. math::\n", "\n", " Pr(\\mathbf{w}_t \\mid \\mathbf{w}_{t + 1}, \\mathbf{x}_{1 \\ldots t}) =\n", " \\NormDist_{\\mathbf{w}_t}\\left[\n", " \\boldsymbol{\\mu}'_{t + 1}, \\boldsymbol{\\Sigma}'_{t + 1}\n", " \\right].\n", "\n", "Since no prior knowledge can be leveraged besides assuming a Gaussian\n", "distribution, the initial parameters can be randomly initialized.\n", "\n", "In the M-step, the lower bound is maximized with respect to the parameters\n", ":math:`\\boldsymbol{\\theta} =\n", "\\left\\{\n", "\\boldsymbol{\\mu}_0, \\boldsymbol{\\Sigma}_0,\n", "\\boldsymbol{\\mu}_p, \\boldsymbol{\\Sigma}_p, \\boldsymbol{\\Psi},\n", "\\boldsymbol{\\mu}_m, \\boldsymbol{\\Sigma}_m, \\boldsymbol{\\Phi}\n", "\\right\\}` so that\n", "\n", ".. math::\n", "\n", " \\DeclareMathOperator{\\tr}{\\mathrm{tr}}\n", " \\boldsymbol{\\theta}^{[t + 1]}\n", " &= \\argmax_{\\boldsymbol{\\theta}}\n", " \\sum_{i = 1}^I\n", " \\int q_i^{[t]}(\\mathbf{h}_i)\n", " \\log Pr(\\{ \\mathbf{x}_{in} \\}_{n = 1}^N, \\mathbf{h}_i \\mid\n", " \\boldsymbol{\\theta}) d\\mathbf{h}_i\n", " & \\quad & \\text{(7.51)}\\\\\n", " &= \\argmax_{\\boldsymbol{\\theta}}\n", " \\sum_{i = 1}^I\n", " \\E\\left[\n", " \\log Pr(\\{ \\mathbf{x}_{in} \\}_{n = 1}^N,\n", " \\{ \\mathbf{w}_{in} \\}_{n = 1}^N \\mid \\boldsymbol{\\theta})\n", " \\right]\\\\\n", " &= \\argmax_{\\boldsymbol{\\theta}}\n", " -\\frac{1}{2} \\left(\n", " C + I \\log \\left\\vert \\boldsymbol{\\Sigma}_0 \\right\\vert +\n", " I N \\log \\left\\vert \\boldsymbol{\\Sigma}_m \\right\\vert +\n", " I (N - 1) \\log \\left\\vert \\boldsymbol{\\Sigma}_p \\right\\vert +\n", " \\tr\\left(\n", " \\E[Z] \\boldsymbol{\\Sigma}_0^{-1}\n", " \\right) +\n", " \\tr\\left(\n", " \\E[M] \\boldsymbol{\\Sigma}_m^{-1}\n", " \\right) +\n", " \\tr\\left(\n", " \\E[P] \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right)\n", " \\right)\n", " & \\quad & \\text{(i), (ii.a), (ii.b), (ii.c).}\n", "\n", "(ii.a)\n", "------\n", "\n", ".. math::\n", "\n", " C = I D_w \\log 2 \\pi + I N D_m \\log 2 \\pi + I (N - 1) D_p \\log 2 \\pi\n", "\n", "and\n", "\n", ".. math::\n", "\n", " \\sum_{i = 1}^I\n", " \\left( \\mathbf{w}_{i1} - \\boldsymbol{\\mu}_0 \\right)^\\top\n", " \\boldsymbol{\\Sigma}_0^{-1}\n", " \\left( \\mathbf{w}_{i1} - \\boldsymbol{\\mu}_0 \\right)\n", " &= \\tr\\left[\n", " \\sum_{i = 1}^I\n", " \\left( \\mathbf{w}_{i1} - \\boldsymbol{\\mu}_0 \\right)\n", " \\left( \\mathbf{w}_{i1} - \\boldsymbol{\\mu}_0 \\right)^\\top\n", " \\boldsymbol{\\Sigma}_0^{-1}\n", " \\right]\n", " & \\quad & \\text{(C.14), (C.15)}\\\\\n", " &= \\tr\\left[\n", " Z \\boldsymbol{\\Sigma}_0^{-1}\n", " \\right]\n", "\n", "(ii.b)\n", "------\n", "\n", ".. math::\n", "\n", " & \\sum_{i = 1}^I \\sum_{n = 1}^N\n", " \\left(\n", " \\mathbf{x}_{in} - \\boldsymbol{\\mu}_m -\n", " \\boldsymbol{\\Phi} \\mathbf{w}_{in}\n", " \\right)^\\top\n", " \\boldsymbol{\\Sigma}_m^{-1}\n", " \\left(\n", " \\mathbf{x}_{in} - \\boldsymbol{\\mu}_m -\n", " \\boldsymbol{\\Phi} \\mathbf{w}_{in}\n", " \\right)\\\\\n", " &= \\tr\\left[\n", " \\sum_{i = 1}^I \\sum_{n = 1}^N\n", " \\left(\n", " \\mathbf{x}_{in} - \\boldsymbol{\\mu}_m -\n", " \\boldsymbol{\\Phi} \\mathbf{w}_{in}\n", " \\right)\n", " \\left(\n", " \\mathbf{x}_{in} - \\boldsymbol{\\mu}_m -\n", " \\boldsymbol{\\Phi} \\mathbf{w}_{in}\n", " \\right)^\\top\n", " \\boldsymbol{\\Sigma}_m^{-1}\n", " \\right]\n", " & \\quad & \\text{(C.14), (C.15)}\\\\\n", " &= \\tr\\left[\n", " M \\boldsymbol{\\Sigma}_m^{-1}\n", " \\right]\n", "\n", "(ii.c)\n", "------\n", "\n", ".. math::\n", "\n", " & \\sum_{i = 1}^I \\sum_{n = 2}^N\n", " \\left(\n", " \\mathbf{w}_{in} - \\boldsymbol{\\mu}_p -\n", " \\boldsymbol{\\Psi} \\mathbf{w}_{i(n - 1)}\n", " \\right)^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " \\left(\n", " \\mathbf{w}_{in} - \\boldsymbol{\\mu}_p -\n", " \\boldsymbol{\\Psi} \\mathbf{w}_{i(n - 1)}\n", " \\right)\\\\\n", " &= \\tr\\left[\n", " \\sum_{i = 1}^I \\sum_{n = 2}^N\n", " \\left(\n", " \\mathbf{w}_{in} - \\boldsymbol{\\mu}_p -\n", " \\boldsymbol{\\Psi} \\mathbf{w}_{i(n - 1)}\n", " \\right)\n", " \\left(\n", " \\mathbf{w}_{in} - \\boldsymbol{\\mu}_p -\n", " \\boldsymbol{\\Psi} \\mathbf{w}_{i(n - 1)}\n", " \\right)^\\top\n", " \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right]\n", " & \\quad & \\text{(C.14), (C.15)}\\\\\n", " &= \\tr\\left[\n", " P \\boldsymbol{\\Sigma}_p^{-1}\n", " \\right]" ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ "Exercise 19.9\n", "=============\n", "\n", "The mean and covariance of the points are respectively\n", "\n", ".. math::\n", "\n", " \\sum_{j = 0}^{2D_\\mathbf{w}} a_j \\hat{\\mathbf{w}}^{[j]}\n", " &= a_0 \\boldsymbol{\\mu}_{t - 1} +\n", " \\sum_{j = 1}^{D_\\mathbf{w}}\n", " \\frac{1 - a_0}{2D_\\mathbf{w}}\n", " \\left(\n", " \\boldsymbol{\\mu}_{t - 1} +\n", " \\sqrt{\\frac{D_\\mathbf{w}}{1 - a_0}}\n", " \\boldsymbol{\\Sigma}_{t - 1}^{1 / 2} \\mathbf{e}_j\n", " \\right) +\\\\\n", " &\\qquad\n", " \\sum_{j = D_\\mathbf{w} + 1}^{2D_\\mathbf{w}}\n", " \\frac{1 - a_0}{2D_\\mathbf{w}}\n", " \\left(\n", " \\boldsymbol{\\mu}_{t - 1} -\n", " \\sqrt{\\frac{D_\\mathbf{w}}{1 - a_0}}\n", " \\boldsymbol{\\Sigma}_{t - 1}^{1 / 2} \\mathbf{e}_{j - D_\\mathbf{w}}\n", " \\right)\n", " & \\quad & \\text{(19.40), (19.41)}\\\\\n", " &= a_0 \\boldsymbol{\\mu}_{t - 1} +\n", " 2 D_\\mathbf{w} \\frac{1 - a_0}{2D_\\mathbf{w}} \\boldsymbol{\\mu}_{t - 1}\\\\\n", " &= \\boldsymbol{\\mu}_{t - 1}\n", "\n", "and\n", "\n", ".. math::\n", "\n", " \\sum_{j = 0}^{2D_\\mathbf{w}} a_j\n", " \\left( \\hat{\\mathbf{w}}^{[j]} - \\boldsymbol{\\mu}_{t - 1} \\right)\n", " \\left( \\hat{\\mathbf{w}}^{[j]} - \\boldsymbol{\\mu}_{t - 1} \\right)^\\top\n", " &= \\sum_{j = 1}^{D_\\mathbf{w}}\n", " \\frac{1 - a_0}{2D_\\mathbf{w}} \\frac{D_\\mathbf{w}}{1 - a_0}\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t - 1}^{1 / 2} \\mathbf{e}_j\n", " \\right)\n", " \\left(\n", " \\boldsymbol{\\Sigma}_{t - 1}^{1 / 2} \\mathbf{e}_j\n", " \\right)^\\top +\\\\\n", " &\\qquad\n", " \\sum_{j = D_\\mathbf{w} + 1}^{2D_\\mathbf{w}}\n", " \\frac{1 - a_0}{2D_\\mathbf{w}} \\frac{D_\\mathbf{w}}{1 - a_0}\n", " \\left(\n", " -\\boldsymbol{\\Sigma}_{t - 1}^{1 / 2} \\mathbf{e}_{j - D_\\mathbf{w}}\n", " \\right)\n", " \\left(\n", " -\\boldsymbol{\\Sigma}_{t - 1}^{1 / 2} \\mathbf{e}_{j - D_\\mathbf{w}}\n", " \\right)^\\top\n", " & \\quad & \\text{(19.40), (19.41)}\\\\\n", " &= \\sum_{j = 1}^{D_\\mathbf{w}}\n", " \\boldsymbol{\\Sigma}_{t - 1}^{1 / 2} \\mathbf{e}_j\n", " \\mathbf{e}_j^\\top {\\boldsymbol{\\Sigma}_{t - 1}^{1 / 2}}^\\top\\\\\n", " &= \\sum_{j = 1}^{D_\\mathbf{w}}\n", " \\mathbf{U} \\boldsymbol{\\Lambda}^{1/2} \\mathbf{e}_j\n", " \\mathbf{e}_j^\\top \\boldsymbol{\\Lambda}^{1/2} \\mathbf{V}^\\top\\\\\n", " &= \\sum_{j = 1}^{D_\\mathbf{w}}\n", " \\lambda_j \\mathbf{U}_{\\cdot j} \\mathbf{V}_{j \\cdot}^\\top\\\\\n", " &= \\boldsymbol{\\Sigma}_{t - 1}\n", "\n", "where the SVD of\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Sigma}_{t - 1}\n", " &= \\mathbf{U} \\boldsymbol{\\Lambda} \\mathbf{V}^\\top\\\\\n", " &= \\sum_j \\lambda_j \\mathbf{U}_{\\cdot j} \\mathbf{V}_{j \\cdot}^\\top,\n", " \\\\\\\\\n", " \\boldsymbol{\\Sigma}_{t - 1}^{1 / 2}\n", " &= \\mathbf{U} \\boldsymbol{\\Lambda}^{1 / 2},\n", " \\\\\\\\\n", " {\\boldsymbol{\\Sigma}_{t - 1}^{1 / 2}}^\\top\n", " &= \\boldsymbol{\\Lambda}^{1 / 2} \\mathbf{V}^\\top." ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ "Exercise 19.20\n", "==============\n", "\n", ".. math::\n", "\n", " \\mathbf{x}\n", " &= \\mathbf{g}[\\mathbf{w}, \\boldsymbol{\\epsilon}]\n", " & \\quad & \\text{(19.30)}\\\\\n", " \\begin{bmatrix} x_1\\\\ y_1\\\\ x_2\\\\ y_2 \\end{bmatrix}\n", " &= \\begin{bmatrix} u_1\\\\ v_1\\\\ u_2\\\\ v_2 \\end{bmatrix} \\frac{1}{1 + w} +\n", " \\boldsymbol{\\epsilon}\n", " & \\quad & \\text{(19.50)}\n", "\n", ":cite:`hooverece8540ekf` has a nice worked out example that makes the following\n", "more understandable.\n", "\n", ".. math::\n", "\n", " \\boldsymbol{\\Phi}\n", " &= \\frac{\n", " \\partial \\mathbf{g}[\\mathbf{w}, \\boldsymbol{\\epsilon}]\n", " }{\\partial \\mathbf{w}}\n", " & \\quad & \\text{(19.31)}\\\\\n", " &= \\frac{\n", " \\partial \\mathbf{g}[\\mathbf{w}, \\boldsymbol{\\epsilon}]\n", " }{\n", " \\partial \\left\\{ u_1, v_1, u_2, v_2, w\\right\\}\n", " }\\\\\n", " &= \\frac{1}{1 + w} \\begin{bmatrix}\n", " 1 & 0 & 0 & 0 & -\\frac{u_1}{1 + w}\\\\\n", " 0 & 1 & 0 & 0 & -\\frac{v_1}{1 + w}\\\\\n", " 0 & 0 & 1 & 0 & -\\frac{u_2}{1 + w}\\\\\n", " 0 & 0 & 0 & 1 & -\\frac{v_2}{1 + w}\n", " \\end{bmatrix}\n", " \\\\\\\\\n", " \\boldsymbol{\\Upsilon}\n", " &= \\frac{\n", " \\partial \\mathbf{g}[\\mathbf{w}, \\boldsymbol{\\epsilon}]\n", " }{\n", " \\partial \\boldsymbol{\\epsilon}\n", " }\\\\\n", " &= \\mathbf{I}\n", " & \\quad & \\text{(19.31).}" ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. rubric:: References\n", "\n", ".. bibliography:: chapter-19.bib" ] } ], "metadata": { "anaconda-cloud": {}, "celltoolbar": "Raw Cell Format", "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }