import algorithms.util as util

# written to depend on a state enumerator because we have one available in the
# experiments anyway, but an approximation could be written that doesn't depend
# on having already solved enumeration
class ImportanceSampler(object):
	def __init__(self, policy, enumerator):
		self._policy = policy
		self._enumerator = enumerator
		self.name = "Importance"

	def sample(self):
		candidate = self._enumerator.sample_history_uniformly()
		weight = candidate.get_reach_probability(self._policy) / self._enumerator.cost()
		return candidate, weight

	def generate_samples(self, num_samples, player=0):
		samples = []
		for _ in range(num_samples):
			h, weight = self.sample()
			samples.append((util.expected_value(h, self._policy)[player], weight))
		return samples

	def mc_estimate(self, num_samples, eval_every):
		sample_value_sum = 0.
		sample_weight_sum = 0.
		estimates = []
		for i in range(num_samples + 1):
			sampled_state, weight = self.sample()
			sample_value_sum += weight * util.expected_value(sampled_state, self._policy)[0]
			sample_weight_sum += weight
			if i % eval_every == 0:
				estimates.append(sample_value_sum / sample_weight_sum)
		return estimates