%r
# Generate a data set
n = 200
heights = rnorm(n, 180, 5)
# Compute Sensitivity:
#-------------------------------------------------------
f = rep(0, n)
for(i in 2:n-1){
a = 1:(i-1)
b = (i+1):n
heights2 = c(heights[a], heights[b])
f[i] = abs(mean(heights) - mean(heights2))
}
sensitivity = max(f)
print(max(f))
#-------------------------------------------------------
# Add noise:
#-------------------------------------------------------
install.packages("rmutil")
install.packages("ggplot2")
require(rmutil)
require(ggplot2)
epsilon1 = 1
epsilon2 = 0.25
mse1 = rep(0,n)
mse2 = rep(0,n)
for(i in 1:n){
dat = heights[1:i]
out1 = mean(dat) + rlaplace(1, 0, sensitivity/epsilon1)
out2 = mean(dat) + rlaplace(1, 0, sensitivity/epsilon2)
mse1[i] = (180 - out1)^2
mse2[i] = (180 - out2)^2
}
# Plot the results
x = 1:n
df = data.frame(x, mse1, mse2)
p = ggplot(df, aes(x)) + # basic graphical object
geom_line(aes(y=mse1), colour= "blue") + # first layer
geom_line(aes(y=mse2), colour= "orange") # second layer
p + labs(title = "Effect of Laplace Noise on MSE(Sample Size)") + xlab("Sample Size") + ylab("MSE")
#-------------------------------------------------------
%r
#l-infinity sampling based privacy mechanism, as suggested by
l_inf_samp = function(X, alpha){
n = length(X[,1])
d = length(X[1,])
samples = matrix(c(rep(0, n*d)), ncol = d, nrow = n)
sample = rep(0,d)
for(j in 1:n){
x = X[j,]
r = max(abs(x))
x_hat = rep(0,d)
for(i in 1:d){
p = 0.5 + x[i]/(2*(r+(r==0)))
y = rbinom(1, 1, p)
x_hat[i] = (y==1)*r - (y==0)*r
}
A = 2^(d-1)
if(d%%2 == 1){
C = (1/A)*choose(d-1, 0.5*(d-1))
} else{
C = 1/(A+0.5*choose(d, 0.5*d))*choose(d-1, 0.5*d)
}
B = r*((exp(alpha)+1)/(exp(alpha)-1))*(1/C)
pi_alph = exp(alpha)/(exp(alpha) + 1)
T_alph = rbinom(1, 1, pi_alph)
accept = FALSE
while(!accept){
z = runif(d, -B, B)
dot = sum(z*x_hat)
if((dot >= 0)&&(T_alph == 1)){
sample = z
accept = TRUE
}
else if((dot <= 0)&&(T_alph == 0)){
sample = z
accept = TRUE
}
}
samples[j,] = sample
}
return(samples)
}
my_mean = function(X){
v_sum = rep(0,d)
N = length(X[,1])
for(i in 1:N){
v_sum = v_sum + X[i,]
}
return(v_sum/N)
}
gen_data = function(N,d,prop){
X = matrix(c(rep(0,d*N)), ncol=d, nrow=N)
for(i in 1:d){
count = 0
while(count < prop[i]){
idx = floor(runif(1,1,N))
if(X[idx,i] == 0){
X[idx,i] = 1
count = count + 1
}
}
}
return(X)
}
%r
library(rmutil)
library(ggplot2)
# Set Parameters
d = 27
alpha = 0.5
N_max = 1000
N_low = 100
# Initialize storage
error1 = rep(0, N_max-N_low)
error2 = rep(0, N_max-N_low)
error3 = rep(0, N_max-N_low)
low_bound = rep(0, N_max-N_low)
# Generate Data
proportions = c(6, 2, 4, 2, 1, 1, 2, 6, 5, 4, 1, 9, 2, 6, 2, 1, 6, 2, 8, 5, 2, 8, 7, 5, 6, 4, 2)*N_max/15
X = gen_data(N_max, d, proportions)
theta_true = my_mean(X)
for(i in N_low:N_max){
N = i
X_n = X[1:i,]
Z = l_inf_samp(X_n, alpha)
W = matrix(rlaplace(N*d, 0, d/alpha), ncol = d, nrow = N)
theta1 = my_mean(X_n)
theta2 = my_mean(Z)
theta3 = my_mean(X_n + W)
error1[i-N_low+1] = max(abs(theta_true - theta1))
error2[i-N_low+1] = max(abs(theta_true - theta2))
error3[i-N_low+1] = max(abs(theta_true - theta3))
a = d*log(2*d)
b = (exp(alpha)-1)^2
low_bound[i-N_low+1] = sqrt(a/(N*b))
}
# Plot the results
x = N_low:(N_max)
df = data.frame(x, error2, error3)
p = ggplot(df, aes(x)) + # basic graphical object
geom_line(aes(y=error1), colour= "red") + # Non-private estimate
geom_line(aes(y=error2), colour= "blue") + # Locally Private estimate
geom_line(aes(y=error3), colour= "orange") + # Laplace Mechanism
geom_line(aes(y=low_bound), colour= "black") # Min/Max optimal bound
p + ggtitle("L-inf Error of different estimation methods as a function of sample size") +
xlab("Sample Size") + ylab("l-inf Error") + labs()
ScaDaMaLe Course site and book