#! /usr/bin/env python3

# Another "hello world" level example of using the SciPy
# optimize.minimize() optimizer, this time on a multinomial,
# where the parameters are probabilities.
#
# Usage:
#   ./optimize_multinomial.py

import scipy.optimize as optimize
import scipy.stats    as stats
import numpy          as np
import sys

# Generate a data set: counts of simulated die rolls, for a loaded die
# of unknown and randomly sampled parameters.
#    p[0..5] are the hidden parameters
#    c[0..5] are the observed counts
#
# The example then uses numerical maximum likelihood estimation to
# estimate the <p>. Note that numerical optimization is unnecessary:
# the observed frequencies (normalized counts) are the ML estimates
# for <p>. The idea is to give a small concrete example of using the
# SciPy numerical optimizer to optimize probability parameters, where
# probabilities are constrained to sum to one. 
#
K = 6                                         # dice
N = 100                                       # total number of rolls
p_true = np.random.dirichlet(np.ones(K))      # choose a random parameter vector
c      = np.random.multinomial(N, p_true)     # sample a random count vector, using the p_true parameters

# The trick is to use a change of variables such that our
# probabilities <p> are in terms of unconstrained real-valued
# values \theta:
#     p_i = \frac {        e^{\theta_i} }
#                 { \sum_j e^{\theta_j} }
#
# We set up the multinomial negative loglikelihood function -log
# P(data | theta) in terms of real-valued theta; we do a c.o.v. to
# probability parameters in order to calculate the NLL.
# 
def nll(theta, c):
    sum = np.sum(np.exp(theta))
    p = [ np.exp(v) / sum for v in theta ]

    ll = 0.
    for i,pi in enumerate(p):
        ll += c[i] * np.log(pi)
    return -ll

# An arbitrary starting point
#
theta0 = np.log(np.random.dirichlet(np.ones(K)))


result = optimize.minimize(nll, theta0, (c))
 

if result.success != True:
    sys.exit("Maximum likelihood fit failed")

sum = np.sum(np.exp(result.x))
for i in range(K):
    print("theta({0}) = {1:.3f}; p = {2:.3f}; true = {3:.3f}; counts = {4}".format(i, result.x[i], np.exp(result.x[i]) / sum, p_true[i], c[i]))