%r # Generate a data set n = 200 heights = rnorm(n, 180, 5) # Compute Sensitivity: #------------------------------------------------------- f = rep(0, n) for(i in 2:n-1){ a = 1:(i-1) b = (i+1):n heights2 = c(heights[a], heights[b]) f[i] = abs(mean(heights) - mean(heights2)) } sensitivity = max(f) print(max(f)) #------------------------------------------------------- # Add noise: #------------------------------------------------------- install.packages("rmutil") install.packages("ggplot2") require(rmutil) require(ggplot2) epsilon1 = 1 epsilon2 = 0.25 mse1 = rep(0,n) mse2 = rep(0,n) for(i in 1:n){ dat = heights[1:i] out1 = mean(dat) + rlaplace(1, 0, sensitivity/epsilon1) out2 = mean(dat) + rlaplace(1, 0, sensitivity/epsilon2) mse1[i] = (180 - out1)^2 mse2[i] = (180 - out2)^2 } # Plot the results x = 1:n df = data.frame(x, mse1, mse2) p = ggplot(df, aes(x)) + # basic graphical object geom_line(aes(y=mse1), colour= "blue") + # first layer geom_line(aes(y=mse2), colour= "orange") # second layer p + labs(title = "Effect of Laplace Noise on MSE(Sample Size)") + xlab("Sample Size") + ylab("MSE") #-------------------------------------------------------
%r #l-infinity sampling based privacy mechanism, as suggested by l_inf_samp = function(X, alpha){ n = length(X[,1]) d = length(X[1,]) samples = matrix(c(rep(0, n*d)), ncol = d, nrow = n) sample = rep(0,d) for(j in 1:n){ x = X[j,] r = max(abs(x)) x_hat = rep(0,d) for(i in 1:d){ p = 0.5 + x[i]/(2*(r+(r==0))) y = rbinom(1, 1, p) x_hat[i] = (y==1)*r - (y==0)*r } A = 2^(d-1) if(d%%2 == 1){ C = (1/A)*choose(d-1, 0.5*(d-1)) } else{ C = 1/(A+0.5*choose(d, 0.5*d))*choose(d-1, 0.5*d) } B = r*((exp(alpha)+1)/(exp(alpha)-1))*(1/C) pi_alph = exp(alpha)/(exp(alpha) + 1) T_alph = rbinom(1, 1, pi_alph) accept = FALSE while(!accept){ z = runif(d, -B, B) dot = sum(z*x_hat) if((dot >= 0)&&(T_alph == 1)){ sample = z accept = TRUE } else if((dot <= 0)&&(T_alph == 0)){ sample = z accept = TRUE } } samples[j,] = sample } return(samples) } my_mean = function(X){ v_sum = rep(0,d) N = length(X[,1]) for(i in 1:N){ v_sum = v_sum + X[i,] } return(v_sum/N) } gen_data = function(N,d,prop){ X = matrix(c(rep(0,d*N)), ncol=d, nrow=N) for(i in 1:d){ count = 0 while(count < prop[i]){ idx = floor(runif(1,1,N)) if(X[idx,i] == 0){ X[idx,i] = 1 count = count + 1 } } } return(X) }
%r library(rmutil) library(ggplot2) # Set Parameters d = 27 alpha = 0.5 N_max = 1000 N_low = 100 # Initialize storage error1 = rep(0, N_max-N_low) error2 = rep(0, N_max-N_low) error3 = rep(0, N_max-N_low) low_bound = rep(0, N_max-N_low) # Generate Data proportions = c(6, 2, 4, 2, 1, 1, 2, 6, 5, 4, 1, 9, 2, 6, 2, 1, 6, 2, 8, 5, 2, 8, 7, 5, 6, 4, 2)*N_max/15 X = gen_data(N_max, d, proportions) theta_true = my_mean(X) for(i in N_low:N_max){ N = i X_n = X[1:i,] Z = l_inf_samp(X_n, alpha) W = matrix(rlaplace(N*d, 0, d/alpha), ncol = d, nrow = N) theta1 = my_mean(X_n) theta2 = my_mean(Z) theta3 = my_mean(X_n + W) error1[i-N_low+1] = max(abs(theta_true - theta1)) error2[i-N_low+1] = max(abs(theta_true - theta2)) error3[i-N_low+1] = max(abs(theta_true - theta3)) a = d*log(2*d) b = (exp(alpha)-1)^2 low_bound[i-N_low+1] = sqrt(a/(N*b)) } # Plot the results x = N_low:(N_max) df = data.frame(x, error2, error3) p = ggplot(df, aes(x)) + # basic graphical object geom_line(aes(y=error1), colour= "red") + # Non-private estimate geom_line(aes(y=error2), colour= "blue") + # Locally Private estimate geom_line(aes(y=error3), colour= "orange") + # Laplace Mechanism geom_line(aes(y=low_bound), colour= "black") # Min/Max optimal bound p + ggtitle("L-inf Error of different estimation methods as a function of sample size") + xlab("Sample Size") + ylab("l-inf Error") + labs()
SDS-2.x, Scalable Data Engineering Science
Last refresh: Never