| sdcMicroObj-class {sdcMicro} | R Documentation |
"sdcMicroObj"Class to save all information about the SDC process
createSdcObj( dat, keyVars, numVars = NULL, pramVars = NULL, ghostVars = NULL, weightVar = NULL, hhId = NULL, strataVar = NULL, sensibleVar = NULL, excludeVars = NULL, options = NULL, seed = NULL, randomizeRecords = FALSE, alpha = 1 ) undolast(object) strataVar(object) <- value ## S4 replacement method for signature 'sdcMicroObj,characterOrNULL' strataVar(object) <- value
dat |
The microdata set. A numeric matrix or data frame containing the data. |
keyVars |
Indices or names of categorical key variables. They must, of course, match with the columns of ‘dat’. |
numVars |
Index or names of continuous key variables. |
pramVars |
Indices or names of categorical variables considered to be pramed. |
ghostVars |
if specified a list which each element being a list of exactly two elements.
The first element must be a character vector specifying exactly one variable name that was
also specified as a categorical key variable ( |
weightVar |
Indices or name determining the vector of sampling weights. |
hhId |
Index or name of the cluster ID (if available). |
strataVar |
Indices or names of stratification variables. |
sensibleVar |
Indices or names of sensible variables (for l-diversity) |
excludeVars |
which variables of |
options |
additional options (if specified, a list must be used as input) |
seed |
(numeric) number specifiying the seed which will be set to allow for
reproducablity. The number will be rounded and saved as element |
randomizeRecords |
(logical) if |
alpha |
numeric between 0 and 1 specifying the fraction on how much keys containing |
object |
a |
value |
|
a sdcMicroObj-class object
an object of class sdcMicroObj with modified slot @strataVar
Objects can be created by calls of the form
new("sdcMicroObj", ...).
Bernhard Meindl, Alexander Kowarik, Matthias Templ, Elias Rut
Templ, M. and Meindl, B. and Kowarik, A.: Statistical Disclosure Control for Micro-Data Using the R Package sdcMicro, Journal of Statistical Software, 67 (4), 1–36, 2015. doi: 10.18637/jss.v067.i04
showClass("sdcMicroObj")
## Not run:
data(testdata)
sdc <- createSdcObj(testdata,
keyVars=c('urbrur','roof','walls','water','electcon','relat','sex'),
numVars=c('expend','income','savings'), w='sampling_weight')
head(sdc@manipNumVars)
### Display Risks
sdc@risk$global
sdc <- dRisk(sdc)
sdc@risk$numeric
### use addNoise without Parameters
sdc <- addNoise(sdc,variables=c("expend","income"))
head(sdc@manipNumVars)
sdc@risk$numeric
### undolast
sdc <- undolast(sdc)
head(sdc@manipNumVars)
sdc@risk$numeric
### redo addNoise with Parameter
sdc <- addNoise(sdc, noise=0.2)
head(sdc@manipNumVars)
sdc@risk$numeric
### dataGen
#sdc <- undolast(sdc)
#head(sdc@risk$individual)
#sdc@risk$global
#sdc <- dataGen(sdc)
#head(sdc@risk$individual)
#sdc@risk$global
### LocalSuppression
sdc <- undolast(sdc)
head(sdc@risk$individual)
sdc@risk$global
sdc <- localSuppression(sdc)
head(sdc@risk$individual)
sdc@risk$global
### microaggregation
sdc <- undolast(sdc)
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc <- microaggregation(sdc)
head(get.sdcMicroObj(sdc, type="manipNumVars"))
### pram
sdc <- undolast(sdc)
head(sdc@risk$individual)
sdc@risk$global
sdc <- pram(sdc,keyVar="water")
head(sdc@risk$individual)
sdc@risk$global
### rankSwap
sdc <- undolast(sdc)
head(sdc@risk$individual)
sdc@risk$global
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc <- rankSwap(sdc)
head(get.sdcMicroObj(sdc, type="manipNumVars"))
head(sdc@risk$individual)
sdc@risk$global
### suda2
sdc <- suda2(sdc)
sdc@risk$suda2
### topBotCoding
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc@risk$numeric
sdc <- topBotCoding(sdc, value=60000000, replacement=62000000, column="income")
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc@risk$numeric
### LocalRecProg
data(testdata2)
keyVars <- c("urbrur", "roof", "walls", "water", "sex")
w <- "sampling_weight"
sdc <- createSdcObj(testdata2,
keyVars = keyVars,
weightVar = w)
sdc@risk$global
sdc <- LocalRecProg(sdc)
sdc@risk$global
### model-based risks
#' formula
form <- as.formula(paste("~", paste(keyVars, collapse = "+")))
sdc <- modRisk(sdc, method = "default", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
sdc <- modRisk(sdc, method = "CE", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
sdc <- modRisk(sdc, method = "PLM", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
sdc <- modRisk(sdc, method = "weightedLLM", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
sdc <- modRisk(sdc, method = "IPF", formulaM = form)
get.sdcMicroObj(sdc, "risk")$model
## End(Not run)
## we can also specify ghost (linked) variables
## these variables are linked to some categorical key variables
## and have the sampe suppression pattern as the variable that they
## are linked to after \code{\link{localSuppression}} has been applied
data(testdata)
testdata$electcon2 <- testdata$electcon
testdata$electcon3 <- testdata$electcon
testdata$water2 <- testdata$water
keyVars <- c("urbrur","roof","walls","water","electcon","relat","sex")
numVars <- c("expend","income","savings")
w <- "sampling_weight"
## we want to make sure that some variables not used as key-variables
## have the same suppression pattern as variables that have been
## selected as key variables. Thus, we are using 'ghost'-variables.
ghostVars <- list()
## we want variables 'electcon2' and 'electcon3' to be linked
## to key-variable 'electcon'
ghostVars[[1]] <- list()
ghostVars[[1]][[1]] <- "electcon"
ghostVars[[1]][[2]] <- c("electcon2","electcon3")
## Not run:
# dontrun because Examples with CPU time > 2.5 times elapsed time
## we want variable 'water2' to be linked to key-variable 'water'
ghostVars[[2]] <- list()
ghostVars[[2]][[1]] <- "water"
ghostVars[[2]][[2]] <- "water2"
## create the sdcMicroObj
obj <- createSdcObj(testdata, keyVars=keyVars,
numVars=numVars, w=w, ghostVars=ghostVars)
## apply 3-anonymity to selected key variables
obj <- kAnon(obj, k=3); obj
## check, if the suppression patterns are identical
manipGhostVars <- get.sdcMicroObj(obj, "manipGhostVars")
manipKeyVars <- get.sdcMicroObj(obj, "manipKeyVars")
all(is.na(manipKeyVars$electcon) == is.na(manipGhostVars$electcon2))
all(is.na(manipKeyVars$electcon) == is.na(manipGhostVars$electcon3))
all(is.na(manipKeyVars$water) == is.na(manipGhostVars$water2))
## exclude some variables
obj <- createSdcObj(testdata, keyVars=c("urbrur","roof","walls"), numVars="savings",
weightVar=w, excludeVars=c("relat","electcon","hhcivil","ori_hid","expend"))
colnames(get.sdcMicroObj(obj, "origData"))
## End(Not run)