from abc import ABC, abstractmethod
from collections.abc import Sequence
from dp_accounting import (
DpEvent,
DpEventBuilder,
GaussianDpEvent,
PoissonSampledDpEvent,
SampledWithoutReplacementDpEvent,
SelfComposedDpEvent,
)
from dp_accounting.rdp.rdp_privacy_accountant import NeighborRel, RdpAccountant
[docs]
class SamplingStrategy(ABC):
def __init__(self, neighbor_relation: NeighborRel) -> None:
self.neighbor_relation = neighbor_relation
[docs]
@abstractmethod
def get_dp_event(self, noise_event: DpEvent) -> DpEvent:
raise NotImplementedError
[docs]
class PoissonSampling(SamplingStrategy):
def __init__(self, sampling_ratio: float) -> None:
self.sampling_ratio = sampling_ratio
super().__init__(NeighborRel.ADD_OR_REMOVE_ONE)
[docs]
def get_dp_event(self, noise_event: DpEvent) -> DpEvent:
return PoissonSampledDpEvent(self.sampling_ratio, noise_event)
[docs]
class FixedSamplingWithoutReplacement(SamplingStrategy):
def __init__(self, population_size: int, sample_size: int) -> None:
self.population_size = population_size
self.sample_size = sample_size
super().__init__(NeighborRel.REPLACE_ONE)
[docs]
def get_dp_event(self, noise_event: DpEvent) -> DpEvent:
return SampledWithoutReplacementDpEvent(self.population_size, self.sample_size, noise_event)
[docs]
class MomentsAccountant:
[docs]
def __init__(self, moment_orders: list[float] | None = None) -> None:
"""
Moment orders are equivalent to lambda from Deep Learning with Differential Privacy (Abadi et. al. 2016).
They form the set of moments to estimate the infimum of Theorem 2 part 2. The default values were taken from
the tensorflow federated DP tutorial notebook:
https://github.com/tensorflow/federated/blob/main/docs/tutorials/federated_learning_with_differential_privacy.ipynb
In the paper above, they state that trying lambda <= 32 is usually sufficient.
Sampling type is the data point sampling strategy: i.e. examples from dataset for a batch with probability q
Noise type specifies whether Gaussian or Laplacian noise is added to the updates
Args:
moment_orders (list[float] | None, optional): See description above. Defaults to None.
""" # noqa
if moment_orders is not None:
self.moment_orders = moment_orders
else:
low_orders = [1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 3.0, 3.5, 4.0, 4.5]
medium_orders: list[float] = list(range(5, 64))
high_orders = [128.0, 256.0, 512.0]
self.moment_orders = low_orders + medium_orders + high_orders
def _construct_dp_events(
self, sampling_strategy: SamplingStrategy, noise_multiplier: float, updates: int
) -> DpEvent:
# Type of noise used for DP on gradient updates
noise_event = GaussianDpEvent(noise_multiplier)
# Type of strategy used to select datapoints (or clients in user-level FL DP)
sampling_event = sampling_strategy.get_dp_event(noise_event)
# Number of times the above two procedures are performed (e.g epochs*batches for DP-SGD)
return SelfComposedDpEvent(sampling_event, updates)
def _construct_dp_events_trajectory(
self,
sampling_strategies: Sequence[SamplingStrategy],
noise_multipliers: list[float],
updates_list: list[int],
) -> DpEvent:
# Given a list of parameters this assumes that the DP operations were performed in sequence
event_builder = DpEventBuilder()
for sampling_strategy, noise_multiplier, updates in zip(sampling_strategies, noise_multipliers, updates_list):
event_builder.compose(self._construct_dp_events(sampling_strategy, noise_multiplier, updates), 1)
return event_builder.build()
def _construct_rdp_accountant(
self,
sampling_strategies: SamplingStrategy | Sequence[SamplingStrategy],
noise_multipliers: float | list[float],
updates: int | list[int],
) -> RdpAccountant:
if isinstance(sampling_strategies, SamplingStrategy):
sampling_strategies = [sampling_strategies]
if isinstance(noise_multipliers, float):
noise_multipliers = [noise_multipliers]
if isinstance(updates, int):
updates = [updates]
# First we construct the DP events for the accountant to accumulate
dp_events = self._construct_dp_events_trajectory(sampling_strategies, noise_multipliers, updates)
# Setup accountant with set of moments to minimize over
rdp_accountant = RdpAccountant(self.moment_orders, sampling_strategies[0].neighbor_relation)
# Set accountant internal state about applied events
rdp_accountant.compose(dp_events)
return rdp_accountant
def _validate_accountant_input(
self,
sampling_strategies: SamplingStrategy | Sequence[SamplingStrategy],
noise_multiplier: float | list[float],
updates: int | list[int],
) -> None:
all_lists = all(
[
isinstance(sampling_strategies, Sequence)
and isinstance(noise_multiplier, list)
and isinstance(updates, list)
]
)
all_values = all(
[
isinstance(sampling_strategies, SamplingStrategy)
and isinstance(noise_multiplier, float)
and isinstance(updates, int)
]
)
assert all_lists or all_values
[docs]
def get_epsilon(
self,
sampling_strategies: SamplingStrategy | Sequence[SamplingStrategy],
noise_multiplier: float | list[float],
updates: int | list[int],
delta: float,
) -> float:
"""
If the parameters are lists, then it is assumed that the training applied the parameters in a sequence of
updates.
Ex.
.. code-block:: python
sampling_strategies = [PoissonSampling(q_1), PoissonSampling(q_2)]
noise_multiplier = [z_1, z_2]
updates = [t_1, t_2]
implies that ``q_1``, ``z_1`` were applied for ``t_1`` updates, followed by ``q_2``, ``z_2`` for ``t_2``
updates.
Args:
sampling_strategies (SamplingStrategy | Sequence[SamplingStrategy]): Are the type of sampling done for
each datapoint or client in the DP procedure. This is either Poisson sampling with a sampling rate
specified or Fixed ratio sampling with a fixed number of selections performed over a specified
population size. For non-FL DP-SGD: This is the ratio of batch size to dataset size (``L/N``, from Deep
Learning with Differential Privacy). For FL with client-side DP-SGD (no noise on server side, instance
level privacy): This is the ratio of client sampling probability to client data point probability
``q*(b_k/n_k)`` For FL with client privacy: This is the sampling of clients from the client population
**NOTE:** If a sequence of strategies is given, they must be all of the same kind (that is poisson or
subset, but may have different parameters)
noise_multiplier (float | list[float]): Ratio of the noise standard deviation to clipping bound (sigma in
Deep Learning with Differential Privacy, ``z`` in some other implementations).
updates (int | list[int]): This is the number of noise applications to the update weights. For non-FL
DP-SGD: This is the number of updates run (``epochs*batches`` per epoch) For FL w/ client-side DP-SGD
(instance DP): This is the number of batches run per client (if selected every time),
``server_updates*epochs per server update*batches per epoch``. For FL with client privacy: Number of
server updates.
delta (float): This is the delta in (epsilon, delta)-Privacy, that we require.
Returns:
float: The corresponding epsilon
"""
self._validate_accountant_input(sampling_strategies, noise_multiplier, updates)
rdp_accountant = self._construct_rdp_accountant(sampling_strategies, noise_multiplier, updates)
# calculate minimum epsilon for fixed delta
return rdp_accountant.get_epsilon(delta)
[docs]
def get_delta(
self,
sampling_strategies: SamplingStrategy | Sequence[SamplingStrategy],
noise_multiplier: float | list[float],
updates: int | list[int],
epsilon: float,
) -> float:
"""
If the parameters are lists, then it is assumed that the training applied the parameters in a sequence of
updates.
Ex.
.. code-block:: python
sampling_strategies = [PoissonSampling(q_1), PoissonSampling(q_2)]
noise_multiplier = [z_1, z_2]
updates = [t_1, t_2]
implies that ``q_1``, ``z_1`` were applied for ``t_1`` updates, followed by ``q_2``, ``z_2`` for ``t_2``
updates.
Args:
sampling_strategies (SamplingStrategy | Sequence[SamplingStrategy]): Are the type of sampling done for
each datapoint or client in the DP procedure. This is either Poisson sampling with a sampling rate
specified or Fixed ratio sampling with a fixed number of selections performed over a specified
population size. For non-FL DP-SGD: This is the ratio of batch size to dataset size (``L/N``, from Deep
Learning with Differential Privacy). For FL with client-side DP-SGD (no noise on server side,
instance level privacy): This is the ratio of client sampling probability to client data point
probability ``q*(b_k/n_k)`` For FL with client privacy: This is the sampling of clients from the client
population.
**NOTE:** If a sequence of strategies is given, they must be all of the same kind (that is poisson or
subset, but may have different parameters)
noise_multiplier (float | list[float]): Ratio of the noise standard deviation to clipping bound (sigma in
Deep Learning with Differential Privacy, z in some other implementations).
updates (int | list[int]): This is the number of noise applications to the update weights. For non-FL
DP-SGD: This is the number of updates run (``epochs*batches per epoch``) For FL w/ client-side DP-SGD
(instance DP): This is the number of batches run per client (if selected every time),
``server_updates*epochs per server update*batches per epoch`` For FL with client privacy: Number of
server updates
epsilon (float): This is the epsilon in (epsilon, delta)-Privacy, that we require.
Returns:
float: delta for the provided epsilon
"""
self._validate_accountant_input(sampling_strategies, noise_multiplier, updates)
rdp_accountant = self._construct_rdp_accountant(sampling_strategies, noise_multiplier, updates)
# calculate minimum delta for fixed epsilon
return rdp_accountant.get_delta(epsilon)