{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Loglikelihood of timeseries with Exponential correlation and vector noise.\nThis example shows how to use ``tripy.chol_loglike_1D`` and how it compares to\n``scipy.stats.multivariate_normal`` for different numbers of observations\nunder the assumption of Exponential correlation.\n\nWe assume that the measurements are obtained from the following probabilistic model:\n\n\\begin{align}\\mathbf{X}_{\\mathrm{model}}(\\mathbf{\\theta}) =\n    \\mathbf{K}(\\boldsymbol{\\theta}) \\cdot f(\\mathbf{t})\n     + \\mathbf{E}_{\\mathrm{meas}},\\end{align}\n\nwhere:\n\n* $\\mathbf{X}_{\\mathrm{model}}$ is the vector of model predictions.\n* $f(\\mathbf{t})$ is a vector valued function.\n* $\\mathbf{K}(\\boldsymbol{\\theta})$ is the correlated multiplicative\n uncertainty factor.\n* $\\mathbf{E}_{\\mathrm{meas}}$ i.i.d. Gaussian random variables.\n* $\\boldsymbol{\\theta}$ is a set of parameters of the probabilistic model.\n\nThe multiplicative factor $\\mathbf{K}$ and additive factor $\\mathbf{E}$\n are distributed as\n$\\mathbf{K}(\\boldsymbol{\\theta}) \\sim \\mathcal{N} \\left[ 1.0,\n\\boldsymbol{\\Sigma}_{\\mathrm{K}}(\\boldsymbol{\\theta})\n \\right]$\nand $\\mathbf{E}_{\\mathrm{meas}} \\sim \\mathcal{N}(0,\n \\boldsymbol{\\Sigma}_{\\mathrm{E}}$,\nrespectively. This yields:\n\n\\begin{align}\\mathbf{X}_{\\mathrm{model}}(\\boldsymbol{\\theta}) \\sim \\mathcal{N}\n        \\left[ f(\\mathbf{t}), \\mathbf{\\Sigma}(\\boldsymbol{\\theta}) \\right],\\end{align}\n\nwith $\\boldsymbol{\\Sigma}(\\boldsymbol{\\theta}) = f(\\mathbf{t}) \\cdot\n\\boldsymbol{\\Sigma}_{\\mathrm{K}} + \\boldsymbol{\\Sigma}_{\\mathrm{E}}$\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Import packages:\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from timeit import default_timer as timer\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom scipy import stats\n\nfrom tripy.kernels import Exponential\nfrom tripy.loglikelihood import chol_loglike_1D"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Problem setup\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "N = 100  # Number of points\nstd_meas = np.repeat(\n    2.0, N\n)  # Standard deviation of the additive measurement uncertainty\nstd_model = np.repeat(0.5, N)  # Standard deviation of the modeling uncertainty\nl_corr = 10.0  # Correlation lengthscale\n\n# Coordinate vector\nt = np.sort(np.linspace(0, 10, N) + 0.1 * np.random.rand(N))\n\n# Vector valued function evaluation\ny_func = np.cos(t) + 5\n\n# Observations\nv_obs = np.random.rand(N) + y_func"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Reference solution using scipy:\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "e_cov_mx = np.diag(std_meas**2)\nkernel = Exponential(np.reshape(t, (-1, 1)))\ncorr_mx = kernel.eval(std_model, length_scale=l_corr)\nkph_cov_mx = np.matmul(np.diag(y_func), np.matmul(corr_mx, np.diag(y_func)))\ncov_mx = kph_cov_mx + e_cov_mx\nlogL_ref = stats.multivariate_normal.logpdf(v_obs, cov=cov_mx)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Timing comparison between conventional and efficient solutions:\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "N_iter = 10\n\n# Reference\nN_vec = np.arange(100, 1000, 100)\nt_ref_list = []\n\nfor i, N in enumerate(N_vec):\n\n    # Vectors of measurement and modeling uncertainty std. dev.\n    std_meas = np.repeat(2.0, N)\n    std_model = np.repeat(0.5, N)\n\n    # Coord vector and a test function\n    t = np.sort(np.linspace(0, 10, N) + 0.1 * np.random.rand(N))\n    y_func = np.cos(t) + 5\n\n    # Observations\n    v_obs = np.random.rand(N) + y_func\n\n    e_cov_mx = np.diag(std_meas**2)\n    kernel = Exponential(np.reshape(t, (-1, 1)))\n\n    # Reference\n    t1 = timer()\n    for _j in range(N_iter):\n        corr_mx = kernel.eval(std_model, length_scale=l_corr)\n        kph_cov_mx = np.matmul(np.diag(y_func), np.matmul(corr_mx, np.diag(y_func)))\n        cov_mx = kph_cov_mx + e_cov_mx\n        test = stats.multivariate_normal.logpdf(v_obs, cov=cov_mx)\n    t2 = timer()\n\n    t_ref_list.append((t2 - t1) / N_iter)\n\n    print(\"=============================\")\n    print(f\"Iter {i + 1} / {len(N_vec)}\")\n    print(f\"N = {N}\")\n    print(f\"t = {t_ref_list[-1]}\")\n    print(\"=============================\")\n\n# Efficient solution\np_vec = np.linspace(2, 6, 50)\nt_list = []\nfor i, p in enumerate(p_vec):\n\n    N = int(10**p)\n\n    # Vectors of measurement and modeling uncertainty std. dev.\n    std_meas = np.repeat(2.0, N)\n    std_model = np.repeat(0.5, N)\n\n    # Coord vector and a test function\n    t = np.sort(np.linspace(0, 10, N) + 0.1 * np.random.rand(N))\n    y_func = np.cos(t) + 5\n\n    # Observations\n    v_obs = np.random.rand(N) + y_func\n\n    # Initial evaluation to perform jit compilation using numba.\n    _ = chol_loglike_1D(v_obs, t, l_corr, std_model, std_meas=std_meas, y_model=y_func)\n\n    # Call function\n    t1 = timer()\n    for _j in range(N_iter):\n        test = chol_loglike_1D(\n            v_obs, t, l_corr, std_model, std_meas=std_meas, y_model=y_func\n        )\n    t2 = timer()\n\n    t_list.append((t2 - t1) / N_iter)\n\n    print(\"=============================\")\n    print(f\"Iter {i + 1} / {len(p_vec)}\")\n    print(f\"p = {p}\")\n    print(f\"t = {t_list[-1]}\")\n    print(\"=============================\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Plot a comparison of the wall clock times:\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "fig = plt.figure()\nplt.plot([int(10**p) for p in p_vec], t_list, label=\"Efficient evaluation\")\nplt.plot(N_vec, t_ref_list, label=\"Naive evaluation\")\nplt.xlabel(\"No. of points\")\nplt.ylabel(\"Wall clock time [s]\")\nplt.xscale(\"log\")\nplt.legend(bbox_to_anchor=(0.8, 0.85), bbox_transform=fig.transFigure)\nplt.grid()\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.6"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}