Skip to content

SMC Sampling fails with multivariate distribution #312

@mathDR

Description

@mathDR

Summary:

When a sequence of thresholds having length > 1 is passed to smc, the inference fails with ValueError: In executing node '_theta_logpdf': Vector 'x' must have either the same number of entries as, or one entry fewer than, parameter vector 'a', but alpha.shape = (10,) and x.shape = (100000,)..

Description:

Using a simulator of a multinomial model, having a dirichlet prior, attempting to run smc for a sequence of thresholds produces an error.

Reproducible Steps:

import numpy as np
import scipy.stats as ss
import elfi
num_categories = 10
num_experiments = 100
theta = elfi.Prior('dirichlet', np.ones(num_categories), name='theta')
def simulator(num_experiments, theta, batch_size=1, random_state=None):
    if len(theta.shape)==1:
        theta = theta.reshape(1,-1)
    return_value = []

    for i in range(theta.shape[0]):
        return_value.append(
            ss.multinomial.rvs(
                100, 
                theta[i,:], 
                random_state=random_state
            )
        )
    return np.array(return_value)
                       
def mean(y):
    return np.mean(y, axis=1)

def ret_identity(y):
    return y

def distance_in_variation(p1,observed=[]):
    arr_p1 = np.asarray(p1)
    y = np.tile(observed[0],(np.asarray(p1).shape[0],1))
    
    return 0.5*np.sum(np.abs(arr_p1-y),axis=1)

theta0 = np.random.dirichlet(np.ones(num_categories))
np.random.seed(20191126) 
y0 = simulator(num_experiments,theta0)

from functools import partial
# Add the simulator node and observed data to the model
sim_fn = partial(simulator,num_experiments)
sim = elfi.Simulator(sim_fn, theta, observed=y0)
d = elfi.Discrepancy(
    distance_in_variation, 
    elfi.Summary(ret_identity, sim, name='ret_identity'), 
    name='d')

smc = elfi.SMC(d, batch_size=10000)

N = 1000
schedule = [100,99]
%time result_smc = smc.sample(N, schedule)

Current Output:

When the threshold schedule is [100], everything runs. When it is changed to [100,99] it fails with the error:

Progress: |█████████████████████████-------------------------| 50.0% Complete
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in execute(cls, G)
     69                 try:
---> 70                     G.node[node] = cls._run(op, node, G)
     71                 except Exception as exc:

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in _run(fn, node, G)
    153 
--> 154         output_dict = {'output': fn(*args, **kwargs)}
    155         return output_dict

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in logpdf(self, x, alpha)
   1448         alpha = _dirichlet_check_parameters(alpha)
-> 1449         x = _dirichlet_check_input(alpha, x)
   1450 

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in _dirichlet_check_input(alpha, x)
   1242                          "parameter vector 'a', but alpha.shape = %s "
-> 1243                          "and x.shape = %s." % (alpha.shape, x.shape))
   1244 

ValueError: Vector 'x' must have either the same number of entries as, or one entry fewer than, parameter vector 'a', but alpha.shape = (10,) and x.shape = (100000,).

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<timed exec> in <module>

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in sample(self, n_samples, *args, **kwargs)
    404         bar = kwargs.pop('bar', True)
    405 
--> 406         return self.infer(n_samples, *args, bar=bar, **kwargs)
    407 
    408     def _extract_result_kwargs(self):

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in infer(self, vis, bar, *args, **kwargs)
    262 
    263         while not self.finished:
--> 264             self.iterate()
    265             if vis:
    266                 self.plot_state(interactive=True, **vis_opt)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in iterate(self)
    299         # Submit new batches if allowed
    300         while self._allow_submit(self.batches.next_index):
--> 301             next_batch = self.prepare_new_batch(self.batches.next_index)
    302             logger.debug("Submitting batch %d" % self.batches.next_index)
    303             self.batches.submit(next_batch)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in prepare_new_batch(self, batch_index)
    716         params = GMDistribution.rvs(*self._gm_params, size=self.batch_size,
    717                                     prior_logpdf=self._prior.logpdf,
--> 718                                     random_state=self._round_random_state)
    719 
    720         batch = arr2d_to_batch(params, self.parameter_names)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/utils.py in rvs(cls, means, cov, weights, size, prior_logpdf, random_state)
    229             # check validity of x
    230             if prior_logpdf is not None:
--> 231                 x = x[np.isfinite(prior_logpdf(x))]
    232 
    233             n_accepted1 = len(x)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/utils.py in logpdf(self, x)
    356     def logpdf(self, x):
    357         """Return the log density of the joint prior at x."""
--> 358         return self._evaluate_pdf(x, log=True)
    359 
    360     def _evaluate_pdf(self, x, log=False):

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/utils.py in _evaluate_pdf(self, x, log)
    380             loaded_net.node[k] = {'output': v}
    381 
--> 382         val = self.client.compute(loaded_net)[node]
    383         if ndim == 0 or (ndim == 1 and self.dim > 1):
    384             val = val[0]

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/client.py in compute(self, loaded_net)
    271     def compute(self, loaded_net):
    272         """Request evaluation of `loaded_net` and wait for result."""
--> 273         return self.apply_sync(Executor.execute, loaded_net)
    274 
    275     @property

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/clients/native.py in apply_sync(self, kallable, *args, **kwargs)
     51 
     52         """
---> 53         return kallable(*args, **kwargs)
     54 
     55     def get_result(self, task_id):

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in execute(cls, G)
     71                 except Exception as exc:
     72                     raise exc.__class__("In executing node '{}': {}."
---> 73                                         .format(node, exc)).with_traceback(exc.__traceback__)
     74 
     75             elif 'output' not in attr:

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in execute(cls, G)
     68                 op = attr['operation']
     69                 try:
---> 70                     G.node[node] = cls._run(op, node, G)
     71                 except Exception as exc:
     72                     raise exc.__class__("In executing node '{}': {}."

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in _run(fn, node, G)
    152         args = [a[1] for a in sorted(args, key=itemgetter(0))]
    153 
--> 154         output_dict = {'output': fn(*args, **kwargs)}
    155         return output_dict
    156 

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in logpdf(self, x, alpha)
   1447         """
   1448         alpha = _dirichlet_check_parameters(alpha)
-> 1449         x = _dirichlet_check_input(alpha, x)
   1450 
   1451         out = self._logpdf(x, alpha)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in _dirichlet_check_input(alpha, x)
   1241                          "of entries as, or one entry fewer than, "
   1242                          "parameter vector 'a', but alpha.shape = %s "
-> 1243                          "and x.shape = %s." % (alpha.shape, x.shape))
   1244 
   1245     if x.shape[0] != alpha.shape[0]:

ValueError: In executing node '_theta_logpdf': Vector 'x' must have either the same number of entries as, or one entry fewer than, parameter vector 'a', but alpha.shape = (10,) and x.shape = (100000,)..

Expected Output:

I expect the output to have samples similar to running with threshold [99].

ELFI Version:

0.7.4

Python Version:

3.7.4

Operating System:

MAC OSX

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions