Library probsa.probability.conditional

(* --------------------------------- ProBsa --------------------------------- *)
From probsa.probability Require Export cdf nrvar partition prob.

(* ---------------------------------- Main ---------------------------------- *)

Recall that a probability of an event A given an event B (ℙ[ A | B ]) is undefined if the probability of B is zero. Therefore, our formalization of conditional probability function expects as an argument a proof that event B has a positive probability.

The purpose of the PosProb class is to provide a way to check if the probability of an event defined by a predicate S is positive. Specifically, it asserts that the probability of the set {ω : Ω | S ω} according to measure μ is greater than zero.

This notion is defined as a typeclass to facilitate inference of the implicit argument about positive probability of an event.

Class PosProb {Ω} (μ : measure Ω) (S : pred Ω) :=
pos_prob : ℙ <μ >{[ S ]} > 0.

Global Instance posprob_xpredT :
∀ Ω {μ : measure Ω}, PosProb μ xpredT.

Conditional Measure μ ↦ μ|S

In this section, we define a "conditional measure", which is a measure restricted to a subset of Ω.

Section ConditionalMeasure.

Consider a probability space (Ω,μ)...

Context {Ω} (μ : measure Ω).

... and a subset of Ω satisfying a predicate S; let us further assume that the subset has positive probability.

Context (S : pred Ω) `{PosProb _ μ S}.

Then, conditional measure "μ restricted on S" can be defined as follows:

μ|S := λ ω ⇒ if (S ω) then μ ω / ℙ<μ>{[ S ]} else 0.

  Definition pmf_restricted :=
    fun ω:Ω ⇒ if S ω then pmf μ ω / ℙ <μ >{[ S ]} else 0.

  Lemma pmf_nonnegative : ∀ ω , pmf_restricted ω ≥ 0.

  Lemma pmf_sums_to_1 : is_series (countable_sum pmf_restricted) 1.

We denote μ restricted on S as restrict μ S.

Definition restrict : measure Ω :=
mkDistrib Ω pmf_restricted pmf_nonnegative pmf_sums_to_1.

End ConditionalMeasure.

(* -------------------------------------------------------------------------- *)

Conditional Probability and CDF

Conditional probability is defined using the notion of conditional measure.

Definition pr_cond {Ω} (μ : measure Ω) (S : pred Ω) `{PosProb _ μ S} (P : pred Ω) : R :=
  ℙ <restrict μ S >{[ P ]}.

Notation "'ℙ<' μ '>{[' A '|' B ']}'" := (pr_cond μ B A)
  (at level 10, format "ℙ< μ >{[ A | B ]}") : probability_scope.
Notation "'ℙ<' μ , ρ '>{[' A '|' B ']}'" := (@pr_cond _ μ B ρ A)
  (at level 10, only parsing, format "ℙ< μ , ρ >{[ A | B ]}") : probability_scope.

Definition cdf_cond {Ω} {μ : measure Ω} (B : pred Ω) `{PosProb _ μ B} (X : nrvar μ) (x : nat) :=
  ℙ <μ >{[ X ⟨<=⟩ x | B ]}.

Notation "'𝔽<' μ '>{[' A '|' B ']}(' x ')'" := (@cdf_cond _ μ B _ A x)
  (at level 10, format "𝔽< μ >{[ A | B ]}( x )") : probability_scope.
Notation "'𝔽<' μ , ρ '>{[' A '|' B ']}(' x ')'" := (@cdf_cond _ μ B ρ A x)
  (at level 10, only parsing, format "𝔽< μ , ρ >{[ A | B ]}( x )") : probability_scope.
Notation "'𝔽<' μ , ρ '>{[' A '|' B ']}'" := (@cdf_cond _ μ B ρ A)
  (at level 10, only parsing, format "𝔽< μ , ρ >{[ A | B ]}") : probability_scope.

(* -------------------------------------------------------------------------- *)

Equivalence to Textbook Definition

In this section, we prove that the measure-based definition (μ|S := ...) of conditional probability coincides with the axiomatic definition ℙ{[A | B]} = ℙ{[ A ∩ B ]} / ℙ{[ B ]}.

Section AxiomaticConditionalProbability.

  Context {Ω} (μ : measure Ω).
  Context (A : pred Ω) (B : pred Ω) `{PosProb _ μ B}.

  Lemma pr_cond_axiomatic :
    ℙ <μ >{[ A | B ]} = ℙ <μ >{[ A ∩ B ]} / ℙ <μ >{[ B ]}.

  Lemma pr_cond_axiomatic' :
    ℙ <μ >{[ A ∩ B ]} = ℙ <μ >{[ B ]} × ℙ <μ >{[ A | B ]}.

End AxiomaticConditionalProbability.

Basic Lemmas

In this section, we prove a few auxiliary lemmas about conditional probability.

Section BasicLemmas.

First, we prove that the probability over a restricted measure can be "folded" back to conditional probability...

  Lemma fold_prob_to_cond_prob :
    ∀ {Ω} (μ : measure Ω) (A B : pred Ω) (ρ : PosProb μ B),
      ℙ <@restrict _ μ B ρ >{[ A ]} = ℙ <μ , ρ >{[ A | B ]}.

... and show a similar folding lemma for conditional CDF.

  Lemma fold_prob_to_cond_cdf :
    ∀ {Ω} (μ : measure Ω) (B : pred Ω) (X : nrvar μ) (c : nat) `{PosProb Ω μ B},
      ℙ <restrict μ B >{[ X ⟨<=⟩ c ]} = 𝔽<μ ,_>{[ X | B ]}(c ).

We prove that the probability of an event P conditioned on an event that is always true xpredT is equal to the probability of P.

  Lemma pr_cond_xpredT :
    ∀ {Ω} (μ : measure Ω) (P : pred Ω) `{PosProb _ μ xpredT},
      ℙ <μ ,_>{[ P | xpredT ]} = ℙ <μ >{[ P ]}.

  Lemma cdf_cond_xpredT :
    ∀ {Ω} (μ : measure Ω) (P : nrvar μ) (ρ : PosProb _ _) (x : nat ),
      𝔽<μ ,ρ >{[ P | xpredT ]}(x ) = 𝔽<μ >{[ P ]}(x ).

  Lemma pr_cond_xpred1 :
    ∀ {Ω} (μ : measure Ω) (P : pred Ω) (ω0 : Ω) (ρ : PosProb _ _),
      ℙ <μ >{[ P | xpred1 ω0 ]} = I [ P ω0 ].

Notice that the value of conditional probability does not change depending on the accompanying proof. Hence, one can switch between different proofs without changing the value.

  Lemma cond_prob_posprob_irrelevance :
    ∀ {Ω} (μ : measure Ω) (A B : pred Ω) (ρ1 ρ2 : PosProb μ B),
      ℙ <μ , ρ1 >{[ A | B ]} = ℙ <μ , ρ2 >{[ A | B ]}.

We prove a similar lemma for conditional CDF.

  Lemma cond_cdf_posprob_irrelevance :
    ∀ {Ω} (μ : measure Ω) (A : nrvar μ) (B : pred Ω) (x : nat) (ρ1 ρ2 : PosProb μ B),
      𝔽<μ , ρ1 >{[ A | B ]}(x ) = 𝔽<μ , ρ2 >{[ A | B ]}(x ).

  Lemma pr_cond_eq_pred :
    ∀ {Ω} (μ : measure Ω) (P Q B : pred Ω) (ρ1 ρ2 : PosProb μ B ),
      (∀ ω, B ω → P ω ↔ Q ω ) →
      ℙ <μ , ρ1 >{[ P | B ]} = ℙ <μ , ρ2 >{[ Q | B ]}.

  Lemma pr_cond_eq_cond :
    ∀ {Ω} (μ : measure Ω) (P B C : pred Ω) (ρ1 : PosProb μ B) (ρ2 : PosProb μ C ),
      (∀ ω, B ω = C ω ) →
      ℙ <μ , ρ1 >{[ P | B ]} = ℙ <μ , ρ2 >{[ P | C ]}.

  Lemma cdf_cond_eq_cond :
    ∀ {Ω} (μ : measure Ω) (P : nrvar μ) (B C : pred Ω) (x : nat) ρ1 ρ2 ,
      (∀ ω, B ω = C ω ) →
      𝔽<μ , ρ1 >{[ P | B ]}(x ) = 𝔽<μ , ρ2 >{[ P | C ]}(x ).

This is a technical lemma that can be skipped. Intuitively, it states that if we consider only those outcomes in the product space where the first component belongs to B (i.e., product measure is restricted to λ ω ⇒ B ω.1), then the probability of A under this restricted measure is the same as the probability of A under the original product measure, but with the first measure restricted to B.

  Lemma distribution_of_prod_restrict :
    ∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2) (B : pred Ω1) (A : pred (Ω1 × Ω2))
      {ρ12 : PosProb (distrib_prod μ1 μ2) (λ ω , B ω .1)} {ρ1 : PosProb μ1 B},
      ℙ < restrict (distrib_prod μ1 μ2) (fun ω ⇒ B ω .1) >{[ A ]}
      = ℙ < distrib_prod (restrict μ1 B) (μ2) >{[ A ]}.

  Lemma pr_joint_pred_eq :
    ∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2) A A1 A2,
      (∀ ω, A ω = A1 ω .1 && A2 ω .2 ) →
      ℙ <distrib_prod μ1 μ2 >{[ A ]} = ℙ <μ1 >{[ A1 ]} × ℙ <μ2 >{[ A2 ]}.

  Lemma pr_cond_joint_pred_eq_cond_eq :
    ∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
      A A1 A2 B B1 B2 ρ ρ1 ρ2,
      (∀ ω, A ω = A1 ω .1 && A2 ω .2 ) →
      (∀ ω, B ω = B1 ω .1 && B2 ω .2 ) →
      ℙ <distrib_prod μ1 μ2 , ρ >{[ A | B ]} = ℙ <μ1 , ρ1 >{[ A1 | B1 ]} × ℙ <μ2 , ρ2 >{[ A2 | B2 ]}.

  Lemma cdf_cond_marginal11 :
    ∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
      (A : nrvar (distrib_prod μ1 μ2)) (A' : nrvar μ1) (B : pred _) ρ1 ρ2 x,
      (∀ ω, A ω = A' ω .1 ) →
      𝔽<distrib_prod μ1 μ2 , ρ1 >{[ A | fun ω ⇒ B ω .1 ]}(x ) = 𝔽<μ1 , ρ2 >{[ A' | B ]}(x ).

  Lemma cdf_cond_marginal21 :
    ∀ {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
      (A : nrvar (distrib_prod μ1 μ2)) (A' : nrvar μ2) B ρ1 ρ2 x,
      (∀ ω, A ω = A' ω .2 ) →
      𝔽<distrib_prod μ1 μ2 , ρ1 >{[ A | fun ω ⇒ B ω .1 ]}(x ) = 𝔽<μ2 , ρ2 >{[ A' | xpredT ]}(x ).

End BasicLemmas.