This notebook explores the impact of the following factors on the readings of the PMS5003:
RH
number of UFP
number of particles below 0.8um
number of particles above 0.8um
total particle number
source of PM
This is done only on peak concentration, with all the data from each experiment. Aggregating the results per experiment yield data sets that are too small to be meaningful.
The target is the PNC reported by the sensors.
This is done using different feature selection algorithms: - Boruta - Ridge - recursive feature selection with SVM
source("variables.r")
source("utilities.r")
#source("../../../AQ_data_preparation/utilities.r")
require(relaimpo)
require(Boruta)
require(lubridate)
df_sensors <- readRDS(sensors_file) %>%
get_sensor_type()
df_ops <- readRDS(ops_file_transformed)
df_nanotracer <- readRDS(nanotracer_file)
df_sensors_ft <- df_sensors %>%
ungroup() %>%
group_by(date, variation, source, exp, sensor_type) %>%
summarise(across(where(is.numeric), ~mean(., na.rm=T)))
df_nanotracer$date <- round_date(df_nanotracer$date, unit = "10 s")
df_nanotracer_ft <- df_nanotracer %>%
dplyr::select(date, N.1.)
df_ops_ft <- df_ops %>%
mutate(lw08um = Bin.1 + Bin.2 + Bin.3 + Bin.4 + Bin.5 + Bin.6*(0.8 - 0.721)/(0.897 - 0.721),
gr08um = Bin.6*(0.897 - 0.8)/(0.897 - 0.721) + Bin.7 + Bin.8 + Bin.9 + Bin.10 + Bin.11 + Bin.12 + Bin.13 + Bin.14 + Bin.15 + Bin.16 + Bin.17,
total_particle_number = Bin.1 + Bin.2 + Bin.3 + Bin.4 + Bin.5 + Bin.6 + Bin.7 + Bin.8 + Bin.9 + Bin.10 + Bin.11 + Bin.12 + Bin.13 + Bin.14 + Bin.15 + Bin.16 + Bin.17) %>%
dplyr::select( -variation, -exp, -source)
df_joined <- df_sensors_ft %>%
ungroup() %>%
inner_join(df_ops_ft, by = c("date")) %>%
inner_join(df_nanotracer_ft, by = c("date"))
head(df_joined)
df_joined_ft <- df_joined %>%
#dplyr::filter(exp != "") %>%
dplyr::filter(variation == "Peaks") %>%
mutate(rh = ifelse(grepl("54%", exp), 54,
ifelse(grepl("69%", exp), 69,
ifelse(grepl("72%", exp), 72,
ifelse(grepl("76%", exp), 76, 79)
)
)
),
source = ifelse(source == "Incense", 1, 0))
df_joined_ft <- df_joined_ft %>%
rename(Bin.1_0.3um = Bin.1,
Bin.2_0.374um = Bin.2,
Bin.3_0.465um = Bin.3,
Bin.4_0.579um = Bin.4,
Bin.5_0.721um = Bin.5,
Bin.6_0.897um = Bin.6,
Bin.7_1.117um = Bin.7,
Bin.8_1.391um = Bin.8,
Bin.9_1.732um = Bin.9,
Bin.10_2.156um = Bin.10,
Bin.11_2.685um = Bin.11,
Bin.12_3.343um = Bin.12,
Bin.13_4.162um = Bin.13,
Bin.14_5.182um = Bin.14,
Bin.15_6.451um = Bin.15,
Bin.16_8.031um = Bin.16,
Bin.17_10um = Bin.17
)
frmla <- gr03um ~ source + lw08um + N.1. + total_particle_number + rh
#frmla <- bias ~ source + lw08um + N.1. + total_particle_number + rh
#frmla_inter <- bias ~ (source + lw08um + N.1. + total_particle_number + rh )^2
require(caret)
trControl <- trainControl(method = "repeatedcv",
number = 10,
search = "grid",
repeats = 10)
require(doParallel)
no_cores <- parallel::detectCores() - 1
cluster <- makePSOCKcluster(no_cores)
registerDoParallel(cluster)
frmla <- gr03um ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "PMS5003"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.804271 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - gr03um"
)
frmla <- gr05um ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "PMS5003"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.53075 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - gr05um"
)
frmla <- gr10um ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "PMS5003"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.5618711 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - gr10um"
)
frmla <- gr25um ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "PMS5003"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.4402349 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - gr25um"
)
frmla <- n05 ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "SPS030"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.5585799 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - n05"
)
frmla <- n1 ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "SPS030"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.5134611 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - n1"
)
frmla <- n25 ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "SPS030"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.5500081 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - n25"
)
frmla <-
Bin0 ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "OPCR1"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.5618341 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - bin0"
)
frmla <-
Bin1 ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "OPCR1"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.5594571 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - bin1"
)
frmla <-
Bin2 ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "OPCR1"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.5615249 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - bin2"
)
frmla <-
Bin3 ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "OPCR1"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.5959251 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - bin3"
)
frmla <-
Bin4 ~ source + lw08um + N.1. + total_particle_number + rh
boruta_output <-
Boruta(
frmla,
data = dplyr::filter(df_joined_ft, sensor_type == "OPCR1"),
doTrace = 0
)
boruta_output
## Boruta performed 9 iterations in 0.4809341 secs.
## 5 attributes confirmed important: lw08um, N.1., rh, source,
## total_particle_number;
## No attributes deemed unimportant.
roughFixMod <- TentativeRoughFix(boruta_output)
boruta_signif <- getSelectedAttributes(roughFixMod)
print(boruta_signif)
## [1] "rh" "total_particle_number" "N.1."
## [4] "lw08um" "source"
imps <- attStats(roughFixMod)
imps2 = imps[imps$decision != 'Rejected', c('meanImp', 'decision')]
head(imps2[order(-imps2$meanImp),])
plot(
boruta_output,
cex.axis = .7,
las = 2,
xlab = "",
main = "Variable Importance - bin4"
)
frmla <-
gr03um ~ source + lw08um + N.1. + total_particle_number + rh
subsets <- c(1:6)
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "PMS5003") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., gr03um)
svmProfile <- rfe(
gr03um ~ .,
data = x,
sizes = c(1:6),
rfeControl = rfeControl(
functions = caretFuncs,
number = 10,
method = "repeatedcv"
),
## pass options to train()
method = "svmRadial"
)
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 909.4 0.5705 601.2 111.05 0.09217 58.41
## 2 861.5 0.6052 569.7 109.15 0.09635 64.31
## 3 711.6 0.7356 473.0 86.43 0.06754 50.23
## 4 673.9 0.7559 453.6 91.66 0.07884 42.26
## 5 579.8 0.8160 401.0 87.42 0.05513 44.21 *
##
## The top 5 variables (out of 5):
## lw08um, total_particle_number, N.1., source, rh
predictors(svmProfile)
## [1] "lw08um" "total_particle_number" "N.1."
## [4] "source" "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall,
var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm = T))
frmla <-
gr05um ~ source + lw08um + N.1. + total_particle_number + rh
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "PMS5003") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., gr05um)
svmProfile <- rfe(
gr05um ~ .,
data = x,
sizes = c(1:6),
rfeControl = rfeControl(
functions = caretFuncs,
number = 10,
method = "repeatedcv"
),
## pass options to train()
method = "svmRadial"
)
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 252.9 0.5793 168.6 34.77 0.09532 21.60
## 2 242.5 0.6124 161.9 38.43 0.07686 23.43
## 3 205.8 0.7228 136.6 38.46 0.06195 22.35
## 4 190.5 0.7611 129.4 37.02 0.04556 20.24
## 5 159.7 0.8302 111.8 27.40 0.02869 16.66 *
##
## The top 5 variables (out of 5):
## lw08um, total_particle_number, N.1., source, rh
predictors(svmProfile)
## [1] "lw08um" "total_particle_number" "N.1."
## [4] "source" "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall,
var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm = T))
frmla <- gr10um ~ source + lw08um + N.1. + total_particle_number + rh
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "PMS5003") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1.,gr10um)
svmProfile <- rfe(gr10um~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 44.70 0.6841 31.07 6.787 0.08745 3.269
## 2 44.21 0.6788 30.72 6.665 0.10598 3.792
## 3 47.37 0.6366 31.47 9.427 0.12995 4.139
## 4 45.76 0.6729 30.53 10.368 0.11254 3.433
## 5 39.50 0.7513 27.31 7.697 0.06212 2.334 *
##
## The top 5 variables (out of 5):
## total_particle_number, lw08um, N.1., source, rh
predictors(svmProfile)
## [1] "total_particle_number" "lw08um" "N.1."
## [4] "source" "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- gr25um ~ source + lw08um + N.1. + total_particle_number + rh
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "PMS5003") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1.,gr25um)
svmProfile <- rfe(gr25um~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 8.123 0.4875 5.534 2.238 0.13557 1.1442
## 2 6.728 0.6531 4.524 2.183 0.09998 0.8110
## 3 6.345 0.6770 4.284 1.871 0.09933 0.7271
## 4 7.115 0.5921 4.630 2.212 0.08658 0.7146
## 5 6.033 0.7094 3.997 2.049 0.08132 0.7526 *
##
## The top 5 variables (out of 5):
## total_particle_number, lw08um, source, N.1., rh
predictors(svmProfile)
## [1] "total_particle_number" "lw08um" "source"
## [4] "N.1." "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- n05 ~ source + lw08um + N.1. + total_particle_number + rh
subsets <- c(1:6)
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "SPS030") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., n05)
svmProfile <- rfe(n05~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 23.02 0.6830 16.18 4.815 0.08228 2.280
## 2 22.37 0.6992 15.99 4.561 0.07431 2.299
## 3 25.05 0.6296 17.19 5.626 0.07987 2.530
## 4 24.27 0.6522 16.64 6.074 0.08615 2.567
## 5 20.31 0.7593 14.25 3.894 0.03027 1.602 *
##
## The top 5 variables (out of 5):
## lw08um, total_particle_number, N.1., source, rh
predictors(svmProfile)
## [1] "lw08um" "total_particle_number" "N.1."
## [4] "source" "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- n1 ~ source + lw08um + N.1. + total_particle_number + rh
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "SPS030") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., n1)
svmProfile <- rfe(n1~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 26.92 0.6883 19.84 4.309 0.08547 2.224
## 2 27.52 0.6791 19.82 5.526 0.11932 3.062
## 3 30.54 0.6017 21.15 4.754 0.10714 2.138
## 4 29.30 0.6270 20.33 5.689 0.12256 2.645
## 5 24.46 0.7479 17.48 4.418 0.07750 2.140 *
##
## The top 5 variables (out of 5):
## total_particle_number, lw08um, N.1., source, rh
predictors(svmProfile)
## [1] "total_particle_number" "lw08um" "N.1."
## [4] "source" "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- n25 ~ source + lw08um + N.1. + total_particle_number + rh
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "SPS030") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., n25)
svmProfile <- rfe(n25~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 28.83 0.6525 20.80 3.519 0.10302 1.345
## 2 29.34 0.6395 21.00 4.177 0.08297 1.652
## 3 31.59 0.5908 22.06 5.618 0.10668 2.401
## 4 28.51 0.6731 19.77 6.365 0.07988 3.511
## 5 26.61 0.7130 19.06 4.344 0.07928 2.171 *
##
## The top 5 variables (out of 5):
## total_particle_number, lw08um, N.1., rh, source
predictors(svmProfile)
## [1] "total_particle_number" "lw08um" "N.1."
## [4] "rh" "source"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- Bin0 ~ source + lw08um + N.1. + total_particle_number + rh
subsets <- c(1:6)
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "OPCR1") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., Bin0)
svmProfile <- rfe(Bin0~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 202.7 0.5574 129.26 45.02 0.08475 19.10
## 2 166.0 0.7200 93.81 49.24 0.07139 16.42
## 3 180.8 0.6569 100.09 63.45 0.13875 19.22
## 4 168.1 0.7024 92.32 65.96 0.13918 17.79
## 5 141.3 0.8013 80.83 49.46 0.07384 14.76 *
##
## The top 5 variables (out of 5):
## total_particle_number, lw08um, N.1., source, rh
predictors(svmProfile)
## [1] "total_particle_number" "lw08um" "N.1."
## [4] "source" "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- Bin1 ~ source + lw08um + N.1. + total_particle_number + rh
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "OPCR1") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., Bin1)
svmProfile <- rfe(Bin1~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 365.3 0.1607 221.63 87.99 0.09025 44.31
## 2 203.8 0.7492 97.96 88.77 0.13251 26.16
## 3 186.7 0.7877 91.44 90.92 0.14621 24.45
## 4 159.4 0.8564 83.61 68.53 0.06941 23.57 *
## 5 176.7 0.8171 84.52 77.23 0.09884 20.09
##
## The top 4 variables (out of 4):
## total_particle_number, lw08um, source, rh
predictors(svmProfile)
## [1] "total_particle_number" "lw08um" "source"
## [4] "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- Bin2 ~ source + lw08um + N.1. + total_particle_number + rh
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "OPCR1") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., Bin2)
svmProfile <- rfe(Bin2~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 105.05 0.3727 55.01 26.35 0.06515 8.756
## 2 54.63 0.8308 27.04 19.54 0.06567 6.295
## 3 48.01 0.8742 24.57 15.53 0.03954 4.627
## 4 58.24 0.8066 29.20 17.51 0.06594 5.912
## 5 46.64 0.8723 25.83 12.58 0.07040 5.672 *
##
## The top 5 variables (out of 5):
## source, total_particle_number, lw08um, N.1., rh
predictors(svmProfile)
## [1] "source" "total_particle_number" "lw08um"
## [4] "N.1." "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- Bin3 ~ source + lw08um + N.1. + total_particle_number + rh
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "OPCR1") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., Bin3)
svmProfile <- rfe(Bin3~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 43.31 0.3501 22.07 9.315 0.06363 2.252
## 2 25.24 0.7698 12.00 9.679 0.10826 2.919
## 3 21.43 0.8440 10.70 8.383 0.07335 2.533
## 4 23.95 0.7966 11.89 9.286 0.09222 2.579
## 5 20.53 0.8525 11.00 6.735 0.05661 2.496 *
##
## The top 5 variables (out of 5):
## source, total_particle_number, lw08um, N.1., rh
predictors(svmProfile)
## [1] "source" "total_particle_number" "lw08um"
## [4] "N.1." "rh"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- Bin4 ~ source + lw08um + N.1. + total_particle_number + rh
x <- df_joined_ft %>%
dplyr::filter(sensor_type == "OPCR1") %>%
dplyr::select(source, lw08um, total_particle_number, rh, N.1., Bin4)
svmProfile <- rfe(Bin4~., data = x,
sizes = c(1:6),
rfeControl = rfeControl(functions = caretFuncs,
number = 10,
method = "repeatedcv"),
## pass options to train()
method = "svmRadial")
svmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 1 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 23.86 0.3637 12.503 7.634 0.09004 2.504
## 2 13.53 0.8004 6.672 4.938 0.06495 1.391
## 3 11.65 0.8612 5.877 4.719 0.05962 1.358 *
## 4 13.71 0.7936 6.761 5.527 0.07574 1.499
## 5 12.23 0.8364 6.328 3.878 0.06008 1.276
##
## The top 3 variables (out of 3):
## source, total_particle_number, lw08um
predictors(svmProfile)
## [1] "source" "total_particle_number" "lw08um"
varImp(svmProfile)
data.frame(Overall = svmProfile$variables$Overall, var = svmProfile$variables$var) %>%
group_by(var) %>%
summarise(mean(Overall, na.rm=T))
frmla <- gr03um ~ source + lw08um + N.1. + total_particle_number + rh
trControl <- trainControl(method = "repeatedcv",
number = 10,
search = "grid",
repeats = 10)
tuneGrid <- expand.grid(alpha = 0,
lambda = seq(0.0001, 1, length = 100))
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "PMS5003", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## N.1. 100.00
## lw08um 78.08
## total_particle_number 68.10
## source 31.22
## rh 0.00
frmla <- gr05um ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "PMS5003", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## N.1. 100.00
## lw08um 86.62
## total_particle_number 75.77
## source 32.35
## rh 0.00
frmla <- gr10um ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "PMS5003", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## total_particle_number 100.000
## lw08um 87.317
## N.1. 20.084
## source 9.541
## rh 0.000
frmla <- gr25um ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "PMS5003", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## total_particle_number 100.00
## source 70.56
## lw08um 41.69
## rh 18.09
## N.1. 0.00
frmla <- n05 ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "SPS030", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## lw08um 100.000
## total_particle_number 99.445
## N.1. 26.235
## rh 8.059
## source 0.000
frmla <- n1 ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "SPS030", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## total_particle_number 100.000
## lw08um 88.994
## N.1. 10.317
## rh 1.104
## source 0.000
frmla <- n25 ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "SPS030", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## total_particle_number 100.000
## lw08um 82.450
## source 5.122
## N.1. 3.637
## rh 0.000
frmla <- Bin0 ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "OPCR1", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## total_particle_number 100.000
## source 56.552
## lw08um 47.453
## rh 3.423
## N.1. 0.000
frmla <- Bin1 ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "OPCR1", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## source 100.000
## total_particle_number 98.080
## rh 31.458
## N.1. 2.025
## lw08um 0.000
frmla <- Bin2 ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "OPCR1", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## source 100.00
## total_particle_number 91.27
## lw08um 20.15
## rh 15.16
## N.1. 0.00
frmla <- Bin3 ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "OPCR1", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## source 100.00
## total_particle_number 95.44
## lw08um 28.61
## rh 14.28
## N.1. 0.00
frmla <- Bin4 ~ source + lw08um + N.1. + total_particle_number + rh
glmnet.fit <- train(frmla, data = df_joined_ft[df_joined_ft$sensor_type == "OPCR1", ],
method = "glmnet", metric = "RMSE",
preProcess = c("center", "scale"),
trControl = trControl,
tuneGrid = tuneGrid)
glmnet.fit$bestTune
varImp(glmnet.fit)
## glmnet variable importance
##
## Overall
## source 100.00
## total_particle_number 98.62
## lw08um 31.89
## rh 10.37
## N.1. 0.00