Library probsa.probability.conditional

(* --------------------------------- ProBsa --------------------------------- *)
From probsa.probability Require Export cdf nrvar partition prob.

(* ---------------------------------- Main ---------------------------------- *)
Recall that a probability of an event A given an event B ([ A | B ]) is undefined if the probability of B is zero. Therefore, our formalization of conditional probability function expects as an argument a proof that event B has a positive probability.
The purpose of the PosProb class is to provide a way to check if the probability of an event defined by a predicate S is positive. Specifically, it asserts that the probability of the set : Ω | S ω} according to measure μ is greater than zero.
This notion is defined as a typeclass to facilitate inference of the implicit argument about positive probability of an event.
Class PosProb {Ω} (μ : measure Ω) (S : pred Ω) :=
  pos_prob : <μ>{[ S ]} > 0.

Global Instance posprob_xpredT :
   Ω {μ : measure Ω}, PosProb μ xpredT.

Conditional Measure μ μ|S

In this section, we define a "conditional measure", which is a measure restricted to a subset of Ω.
Consider a probability space (Ω,μ)...
  Context {Ω} (μ : measure Ω).

... and a subset of Ω satisfying a predicate S; let us further assume that the subset has positive probability.
  Context (S : pred Ω) `{PosProb _ μ S}.

Then, conditional measure "μ restricted on S" can be defined as follows:
μ|S := λ ω if (S ω) then μ ω / <μ>{[ S ]} else 0.
  Definition pmf_restricted :=
    fun ω:Ωif S ω then pmf μ ω / <μ>{[ S ]} else 0.

  Lemma pmf_nonnegative : ω, pmf_restricted ω 0.

  Lemma pmf_sums_to_1 : is_series (countable_sum pmf_restricted) 1.

We denote μ restricted on S as restrict μ S.
  Definition restrict : measure Ω :=
    mkDistrib Ω pmf_restricted pmf_nonnegative pmf_sums_to_1.

End ConditionalMeasure.

(* -------------------------------------------------------------------------- *)

Conditional Probability and CDF

Conditional probability is defined using the notion of conditional measure.

Definition pr_cond {Ω} (μ : measure Ω) (S : pred Ω) `{PosProb _ μ S} (P : pred Ω) : R :=
  <restrict μ S>{[ P ]}.

Notation "'ℙ<' μ '>{[' A '|' B ']}'" := (pr_cond μ B A)
  (at level 10, format "ℙ< μ >{[ A | B ]}") : probability_scope.
Notation "'ℙ<' μ , ρ '>{[' A '|' B ']}'" := (@pr_cond _ μ B ρ A)
  (at level 10, only parsing, format "ℙ< μ , ρ >{[ A | B ]}") : probability_scope.

Definition cdf_cond {Ω} {μ : measure Ω} (B : pred Ω) `{PosProb _ μ B} (X : nrvar μ) (x : nat) :=
  <μ>{[ X ⟨<=⟩ x | B ]}.

Notation "'𝔽<' μ '>{[' A '|' B ']}(' x ')'" := (@cdf_cond _ μ B _ A x)
  (at level 10, format "𝔽< μ >{[ A | B ]}( x )") : probability_scope.
Notation "'𝔽<' μ , ρ '>{[' A '|' B ']}(' x ')'" := (@cdf_cond _ μ B ρ A x)
  (at level 10, only parsing, format "𝔽< μ , ρ >{[ A | B ]}( x )") : probability_scope.
Notation "'𝔽<' μ , ρ '>{[' A '|' B ']}'" := (@cdf_cond _ μ B ρ A)
  (at level 10, only parsing, format "𝔽< μ , ρ >{[ A | B ]}") : probability_scope.

(* -------------------------------------------------------------------------- *)

Equivalence to Textbook Definition

In this section, we prove that the measure-based definition (μ|S := ...) of conditional probability coincides with the axiomatic definition {[A | B]} = {[ A B ]} / {[ B ]}.

Basic Lemmas

In this section, we prove a few auxiliary lemmas about conditional probability.
Section BasicLemmas.

First, we prove that the probability over a restricted measure can be "folded" back to conditional probability...
  Lemma fold_prob_to_cond_prob :
     {Ω} (μ : measure Ω) (A B : pred Ω) (ρ : PosProb μ B),
      <@restrict _ μ B ρ>{[ A ]} = <μ, ρ>{[ A | B ]}.

... and show a similar folding lemma for conditional CDF.
  Lemma fold_prob_to_cond_cdf :
     {Ω} (μ : measure Ω) (B : pred Ω) (X : nrvar μ) (c : nat) `{PosProb Ω μ B},
      <restrict μ B>{[ X ⟨<=⟩ c ]} = 𝔽<μ,_>{[ X | B ]}(c).

We prove that the probability of an event P conditioned on an event that is always true xpredT is equal to the probability of P.
  Lemma pr_cond_xpredT :
     {Ω} (μ : measure Ω) (P : pred Ω) `{PosProb _ μ xpredT},
      <μ,_>{[ P | xpredT ]} = <μ>{[ P ]}.

  Lemma cdf_cond_xpredT :
     {Ω} (μ : measure Ω) (P : nrvar μ) (ρ : PosProb _ _) (x : nat),
      𝔽<μ,ρ>{[ P | xpredT ]}(x) = 𝔽<μ>{[ P ]}(x).

  Lemma pr_cond_xpred1 :
     {Ω} (μ : measure Ω) (P : pred Ω) (ω0 : Ω) (ρ : PosProb _ _),
      <μ>{[ P | xpred1 ω0 ]} = I[ P ω0 ].

Notice that the value of conditional probability does not change depending on the accompanying proof. Hence, one can switch between different proofs without changing the value.
  Lemma cond_prob_posprob_irrelevance :
     {Ω} (μ : measure Ω) (A B : pred Ω) (ρ1 ρ2 : PosProb μ B),
      <μ, ρ1>{[ A | B ]} = <μ, ρ2>{[ A | B ]}.

We prove a similar lemma for conditional CDF.
  Lemma cond_cdf_posprob_irrelevance :
     {Ω} (μ : measure Ω) (A : nrvar μ) (B : pred Ω) (x : nat) (ρ1 ρ2 : PosProb μ B),
      𝔽<μ, ρ1>{[ A | B ]}(x) = 𝔽<μ, ρ2>{[ A | B ]}(x).

  Lemma pr_cond_eq_pred :
     {Ω} (μ : measure Ω) (P Q B : pred Ω) (ρ1 ρ2 : PosProb μ B),
      ( ω, B ω P ω Q ω)
      <μ, ρ1>{[ P | B ]} = <μ, ρ2>{[ Q | B ]}.

  Lemma pr_cond_eq_cond :
     {Ω} (μ : measure Ω) (P B C : pred Ω) (ρ1 : PosProb μ B) (ρ2 : PosProb μ C),
      ( ω, B ω = C ω)
      <μ, ρ1>{[ P | B ]} = <μ, ρ2>{[ P | C ]}.

  Lemma cdf_cond_eq_cond :
     {Ω} (μ : measure Ω) (P : nrvar μ) (B C : pred Ω) (x : nat) ρ1 ρ2,
      ( ω, B ω = C ω)
      𝔽<μ, ρ1>{[ P | B ]}(x) = 𝔽<μ, ρ2>{[ P | C ]}(x).

This is a technical lemma that can be skipped. Intuitively, it states that if we consider only those outcomes in the product space where the first component belongs to B (i.e., product measure is restricted to λ ω B ω.1), then the probability of A under this restricted measure is the same as the probability of A under the original product measure, but with the first measure restricted to B.
  Lemma distribution_of_prod_restrict :
     {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2) (B : pred Ω1) (A : pred (Ω1 × Ω2))
      {ρ12 : PosProb (distrib_prod μ1 μ2) (λ ω, B ω.1)} {ρ1 : PosProb μ1 B},
      < restrict (distrib_prod μ1 μ2) (fun ωB ω.1) >{[ A ]}
      = < distrib_prod (restrict μ1 B) (μ2) >{[ A ]}.

  Lemma pr_joint_pred_eq :
     {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2) A A1 A2,
      ( ω, A ω = A1 ω.1 && A2 ω.2)
      <distrib_prod μ1 μ2>{[ A ]} = <μ1>{[ A1 ]} × <μ2>{[ A2 ]}.

  Lemma pr_cond_joint_pred_eq_cond_eq :
     {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
      A A1 A2 B B1 B2 ρ ρ1 ρ2,
      ( ω, A ω = A1 ω.1 && A2 ω.2)
      ( ω, B ω = B1 ω.1 && B2 ω.2)
      <distrib_prod μ1 μ2, ρ>{[ A | B ]} = <μ1, ρ1>{[ A1 | B1 ]} × <μ2, ρ2>{[ A2 | B2 ]}.

  Lemma cdf_cond_marginal11 :
     {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
      (A : nrvar (distrib_prod μ1 μ2)) (A' : nrvar μ1) (B : pred _) ρ1 ρ2 x,
      ( ω, A ω = A' ω.1)
      𝔽<distrib_prod μ1 μ2, ρ1>{[ A | fun ωB ω.1 ]}(x) = 𝔽<μ1, ρ2>{[ A' | B ]}(x).

  Lemma cdf_cond_marginal21 :
     {Ω1} {Ω2} (μ1 : measure Ω1) (μ2 : measure Ω2)
      (A : nrvar (distrib_prod μ1 μ2)) (A' : nrvar μ2) B ρ1 ρ2 x,
      ( ω, A ω = A' ω.2)
      𝔽<distrib_prod μ1 μ2, ρ1>{[ A | fun ωB ω.1 ]}(x) = 𝔽<μ2, ρ2>{[ A' | xpredT ]}(x).

End BasicLemmas.