Library probsa.probability.conditional
(* --------------------------------- ProBsa --------------------------------- *)
From probsa.probability Require Export cdf nrvar partition prob.
(* ---------------------------------- Main ---------------------------------- *)
From probsa.probability Require Export cdf nrvar partition prob.
(* ---------------------------------- Main ---------------------------------- *)
Recall that a probability of an event A given an event B (ℙ[
A | B ]) is undefined if the probability of B is
zero. Therefore, our formalization of conditional probability
function expects as an argument a proof that event B has a
positive probability.
The purpose of the PosProb class is to provide a way to check if
the probability of an event defined by a predicate S is
positive. Specifically, it asserts that the probability of the set
{ω : Ω | S ω} according to measure μ is greater than zero.
This notion is defined as a typeclass to facilitate inference of
the implicit argument about positive probability of an event.
Class PosProb {Ω} (μ : measure Ω) (S : pred Ω) :=
pos_prob : ℙ<μ>{[ S ]} > 0.
Global Instance posprob_xpredT :
∀ Ω {μ : measure Ω}, PosProb μ xpredT.
pos_prob : ℙ<μ>{[ S ]} > 0.
Global Instance posprob_xpredT :
∀ Ω {μ : measure Ω}, PosProb μ xpredT.
Conditional Measure μ ↦ μ|S
In this section, we define a "conditional measure", which is a measure restricted to a subset of Ω.
... and a subset of Ω satisfying a predicate S; let us
further assume that the subset has positive probability.
Then, conditional measure "μ restricted on S" can be defined
as follows:
μ|S := λ ω ⇒ if (S ω) then μ ω / ℙ<μ>{[ S ]} else 0.
Definition pmf_restricted :=
fun ω:Ω ⇒ if S ω then pmf μ ω / ℙ<μ>{[ S ]} else 0.
Lemma pmf_nonnegative : ∀ ω, pmf_restricted ω ≥ 0.
Lemma pmf_sums_to_1 : is_series (countable_sum pmf_restricted) 1.
fun ω:Ω ⇒ if S ω then pmf μ ω / ℙ<μ>{[ S ]} else 0.
Lemma pmf_nonnegative : ∀ ω, pmf_restricted ω ≥ 0.
Lemma pmf_sums_to_1 : is_series (countable_sum pmf_restricted) 1.
Definition restrict : measure Ω :=
mkDistrib Ω pmf_restricted pmf_nonnegative pmf_sums_to_1.
End ConditionalMeasure.
(* -------------------------------------------------------------------------- *)
mkDistrib Ω pmf_restricted pmf_nonnegative pmf_sums_to_1.
End ConditionalMeasure.
(* -------------------------------------------------------------------------- *)
Conditional Probability and CDF
Conditional probability is defined using the notion of conditional measure.Definition pr_cond {Ω} (μ : measure Ω) (S : pred Ω) `{PosProb _ μ S} (P : pred Ω) : R :=
ℙ<restrict μ S>{[ P ]}.
Notation "'ℙ<' μ '>{[' A '|' B ']}'" := (pr_cond μ B A)
(at level 10, format "ℙ< μ >{[ A | B ]}") : probability_scope.
Notation "'ℙ<' μ , ρ '>{[' A '|' B ']}'" := (@pr_cond _ μ B ρ A)
(at level 10, only parsing, format "ℙ< μ , ρ >{[ A | B ]}") : probability_scope.
Definition cdf_cond {Ω} {μ : measure Ω} (B : pred Ω) `{PosProb _ μ B} (X : nrvar μ) (x : nat) :=
ℙ<μ>{[ X ⟨<=⟩ x | B ]}.
Notation "'𝔽<' μ '>{[' A '|' B ']}(' x ')'" := (@cdf_cond _ μ B _ A x)
(at level 10, format "𝔽< μ >{[ A | B ]}( x )") : probability_scope.
Notation "'𝔽<' μ , ρ '>{[' A '|' B ']}(' x ')'" := (@cdf_cond _ μ B ρ A x)
(at level 10, only parsing, format "𝔽< μ , ρ >{[ A | B ]}( x )") : probability_scope.
Notation "'𝔽<' μ , ρ '>{[' A '|' B ']}'" := (@cdf_cond _ μ B ρ A)
(at level 10, only parsing, format "𝔽< μ , ρ >{[ A | B ]}") : probability_scope.
(* -------------------------------------------------------------------------- *)
Equivalence to Textbook Definition
In this section, we prove that the measure-based definition (μ|S := ...) of conditional probability coincides with the axiomatic definition ℙ{[A | B]} = ℙ{[ A ∩ B ]} / ℙ{[ B ]}.
Section AxiomaticConditionalProbability.
Context {Ω} (μ : measure Ω).
Context (A : pred Ω) (B : pred Ω) `{PosProb _ μ B}.
Lemma pr_cond_axiomatic :
ℙ<μ>{[ A | B ]} = ℙ<μ>{[ A ∩ B ]} / ℙ<μ>{[ B ]}.
Lemma pr_cond_axiomatic' :
ℙ<μ>{[ A ∩ B ]} = ℙ<μ>{[ B ]} × ℙ<μ>{[ A | B ]}.
End AxiomaticConditionalProbability.
Context {Ω} (μ : measure Ω).
Context (A : pred Ω) (B : pred Ω) `{PosProb _ μ B}.
Lemma pr_cond_axiomatic :
ℙ<μ>{[ A | B ]} = ℙ<μ>{[ A ∩ B ]} / ℙ<μ>{[ B ]}.
Lemma pr_cond_axiomatic' :
ℙ<μ>{[ A ∩ B ]} = ℙ<μ>{[ B ]} × ℙ<μ>{[ A | B ]}.
End AxiomaticConditionalProbability.
First, we prove that the probability over a restricted measure
can be "folded" back to conditional probability...
Lemma fold_prob_to_cond_prob :
∀ {Ω} (μ : measure Ω) (A B : pred Ω) (ρ : PosProb μ B),
ℙ<@restrict _ μ B ρ>{[ A ]} = ℙ<μ, ρ>{[ A | B ]}.
∀ {Ω} (μ : measure Ω) (A B : pred Ω) (ρ : PosProb μ B),
ℙ<@restrict _ μ B ρ>{[ A ]} = ℙ<μ, ρ>{[ A | B ]}.
... and show a similar folding lemma for conditional CDF.
Lemma fold_prob_to_cond_cdf :
∀ {Ω} (μ : measure Ω) (B : pred Ω) (X : nrvar μ) (c : nat) `{PosProb Ω μ B},
ℙ<restrict μ B>{[ X ⟨<=⟩ c ]} = 𝔽<μ,_>{[ X | B ]}(c).
∀ {Ω} (μ : measure Ω) (B : pred Ω) (X : nrvar μ) (c : nat) `{PosProb Ω μ B},
ℙ<restrict μ B>{[ X ⟨<=⟩ c ]} = 𝔽<μ,_>{[ X | B ]}(c).
We prove that the probability of an event P conditioned on an
event that is always true xpredT is equal to the probability
of P.
Lemma pr_cond_xpredT :
∀ {Ω} (μ : measure Ω) (P : pred Ω) `{PosProb _ μ xpredT},
ℙ<μ,_>{[ P | xpredT ]} = ℙ<μ>{[ P ]}.
Lemma cdf_cond_xpredT :
∀ {Ω} (μ : measure Ω) (P : nrvar μ) (ρ : PosProb _ _) (x : nat),
𝔽<μ,ρ>{[ P | xpredT ]}(x) = 𝔽<μ>{[ P ]}(x).
Lemma pr_cond_xpred1 :
∀ {Ω} (μ : measure Ω) (P : pred Ω) (ω0 : Ω) (ρ : PosProb _ _),
ℙ<μ>{[ P | xpred1 ω0 ]} = I[ P ω0 ].
∀ {Ω} (μ : measure Ω) (P : pred Ω) `{PosProb _ μ xpredT},
ℙ<μ,_>{[ P | xpredT ]} = ℙ<μ>{[ P ]}.
Lemma cdf_cond_xpredT :
∀ {Ω} (μ : measure Ω) (P : nrvar μ) (ρ : PosProb _ _) (x : nat),
𝔽<μ,ρ>{[ P | xpredT ]}(x) = 𝔽<μ>{[ P ]}(x).
Lemma pr_cond_xpred1 :
∀ {Ω} (μ : measure Ω) (P : pred Ω) (ω0 : Ω) (ρ : PosProb _ _),
ℙ<μ>{[ P | xpred1 ω0 ]} = I[ P ω0 ].
Notice that the value of conditional probability does not
change depending on the accompanying proof. Hence, one can
switch between different proofs without changing the value.
Lemma cond_prob_posprob_irrelevance :
∀ {Ω} (μ : measure Ω) (A B : pred Ω) (ρ1 ρ2 : PosProb μ B),
ℙ<μ, ρ1>{[ A | B ]} = ℙ<μ, ρ2>{[ A | B ]}.
∀ {Ω} (μ : measure Ω) (A B : pred Ω) (ρ1 ρ2 : PosProb μ B),
ℙ<μ, ρ1>{[ A | B ]} = ℙ<μ, ρ2>{[ A | B ]}.
We prove a similar lemma for conditional CDF.
Lemma cond_cdf_posprob_irrelevance :
∀ {Ω} (μ : measure Ω) (A : nrvar μ) (B : pred Ω) (x : nat) (ρ1 ρ2 : PosProb μ B),
𝔽<μ, ρ1>{[ A | B ]}(x) = 𝔽<μ, ρ2>{[ A | B ]}(x).
Lemma pr_cond_eq_pred :
∀ {Ω} (μ : measure Ω) (P Q B : pred Ω) (ρ1 ρ2 : PosProb μ B),
(∀ ω, B ω → P ω ↔ Q ω) →
ℙ<μ, ρ1>{[ P | B ]} = ℙ<μ, ρ2>{[ Q | B ]}.
Lemma pr_cond_eq_cond :
∀ {Ω} (μ : measure Ω) (P B C : pred Ω) (ρ1 : PosProb μ B) (ρ2 : PosProb μ C),
(∀ ω, B ω = C ω) →
ℙ<μ, ρ1>{[ P | B ]} = ℙ<μ, ρ2>{[ P | C ]}.
Lemma cdf_cond_eq_cond :
∀ {Ω} (μ : measure Ω) (P : nrvar μ) (B C : pred Ω) (x : nat) ρ1 ρ2,
(∀ ω, B ω = C ω) →
𝔽<μ, ρ1>{[ P | B ]}(x) = 𝔽<μ, ρ2>{[ P | C ]}(x).
∀ {Ω} (μ : measure Ω) (A : nrvar μ) (B : pred Ω) (x : nat) (ρ1 ρ2 : PosProb μ B),
𝔽<μ, ρ1>{[ A | B ]}(x) = 𝔽<μ, ρ2>{[ A | B ]}(x).
Lemma pr_cond_eq_pred :
∀ {Ω} (μ : measure Ω) (P Q B : pred Ω) (ρ1 ρ2 : PosProb μ B),
(∀ ω, B ω → P ω ↔ Q ω) →
ℙ<μ, ρ1>{[ P | B ]} = ℙ<μ, ρ2>{[ Q | B ]}.
Lemma pr_cond_eq_cond :
∀ {Ω} (μ : measure Ω) (P B C : pred Ω) (ρ1 : PosProb μ B) (ρ2 : PosProb μ C),
(∀ ω, B ω = C ω) →
ℙ<μ, ρ1>{[ P | B ]} = ℙ<μ, ρ2>{[ P | C ]}.
Lemma cdf_cond_eq_cond :
∀ {Ω} (μ : measure Ω) (P : nrvar μ) (B C : pred Ω) (x : nat) ρ1 ρ2,
(∀ ω, B ω = C ω) →
𝔽<μ, ρ1>{[ P | B ]}(x) = 𝔽<μ, ρ2>{[ P | C ]}(x).
This is a technical lemma that can be skipped. Intuitively, it
states that if we consider only those outcomes in the product
space where the first component belongs to B (i.e., product
measure is restricted to λ ω ⇒ B ω.1), then the probability of
A under this restricted measure is the same as the probability
of A under the original product measure, but with the first
measure restricted to B.
Lemma distribution_of_prod_restrict :
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2) (B : pred Ω1) (A : pred (Ω1 × Ω2))
{ρ12 : PosProb (distrib_prod μ1 μ2) (λ ω, B ω.1)} {ρ1 : PosProb μ1 B},
ℙ< restrict (distrib_prod μ1 μ2) (fun ω ⇒ B ω.1) >{[ A ]}
= ℙ< distrib_prod (restrict μ1 B) (μ2) >{[ A ]}.
Lemma pr_joint_pred_eq :
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2) A A1 A2,
(∀ ω, A ω = A1 ω.1 && A2 ω.2) →
ℙ<distrib_prod μ1 μ2>{[ A ]} = ℙ<μ1>{[ A1 ]} × ℙ<μ2>{[ A2 ]}.
Lemma pr_cond_joint_pred_eq_cond_eq :
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
A A1 A2 B B1 B2 ρ ρ1 ρ2,
(∀ ω, A ω = A1 ω.1 && A2 ω.2) →
(∀ ω, B ω = B1 ω.1 && B2 ω.2) →
ℙ<distrib_prod μ1 μ2, ρ>{[ A | B ]} = ℙ<μ1, ρ1>{[ A1 | B1 ]} × ℙ<μ2, ρ2>{[ A2 | B2 ]}.
Lemma cdf_cond_marginal11 :
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
(A : nrvar (distrib_prod μ1 μ2)) (A' : nrvar μ1) (B : pred _) ρ1 ρ2 x,
(∀ ω, A ω = A' ω.1) →
𝔽<distrib_prod μ1 μ2, ρ1>{[ A | fun ω ⇒ B ω.1 ]}(x) = 𝔽<μ1, ρ2>{[ A' | B ]}(x).
Lemma cdf_cond_marginal21 :
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
(A : nrvar (distrib_prod μ1 μ2)) (A' : nrvar μ2) B ρ1 ρ2 x,
(∀ ω, A ω = A' ω.2) →
𝔽<distrib_prod μ1 μ2, ρ1>{[ A | fun ω ⇒ B ω.1 ]}(x) = 𝔽<μ2, ρ2>{[ A' | xpredT ]}(x).
End BasicLemmas.
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2) (B : pred Ω1) (A : pred (Ω1 × Ω2))
{ρ12 : PosProb (distrib_prod μ1 μ2) (λ ω, B ω.1)} {ρ1 : PosProb μ1 B},
ℙ< restrict (distrib_prod μ1 μ2) (fun ω ⇒ B ω.1) >{[ A ]}
= ℙ< distrib_prod (restrict μ1 B) (μ2) >{[ A ]}.
Lemma pr_joint_pred_eq :
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2) A A1 A2,
(∀ ω, A ω = A1 ω.1 && A2 ω.2) →
ℙ<distrib_prod μ1 μ2>{[ A ]} = ℙ<μ1>{[ A1 ]} × ℙ<μ2>{[ A2 ]}.
Lemma pr_cond_joint_pred_eq_cond_eq :
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
A A1 A2 B B1 B2 ρ ρ1 ρ2,
(∀ ω, A ω = A1 ω.1 && A2 ω.2) →
(∀ ω, B ω = B1 ω.1 && B2 ω.2) →
ℙ<distrib_prod μ1 μ2, ρ>{[ A | B ]} = ℙ<μ1, ρ1>{[ A1 | B1 ]} × ℙ<μ2, ρ2>{[ A2 | B2 ]}.
Lemma cdf_cond_marginal11 :
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
(A : nrvar (distrib_prod μ1 μ2)) (A' : nrvar μ1) (B : pred _) ρ1 ρ2 x,
(∀ ω, A ω = A' ω.1) →
𝔽<distrib_prod μ1 μ2, ρ1>{[ A | fun ω ⇒ B ω.1 ]}(x) = 𝔽<μ1, ρ2>{[ A' | B ]}(x).
Lemma cdf_cond_marginal21 :
∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
(A : nrvar (distrib_prod μ1 μ2)) (A' : nrvar μ2) B ρ1 ρ2 x,
(∀ ω, A ω = A' ω.2) →
𝔽<distrib_prod μ1 μ2, ρ1>{[ A | fun ω ⇒ B ω.1 ]}(x) = 𝔽<μ2, ρ2>{[ A' | xpredT ]}(x).
End BasicLemmas.