У меня есть фреймворк, в котором все столбцы являются такими символами.
ID <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
ToolID <- c("CCP_A","CCP_A","CCQ_A","CCQ_A","IOT_B","CCP_B","CCQ_B","IOT_B",
"CCP_A","CCP_A","CCQ_A","CCQ_A","IOT_B","CCP_B","CCQ_B","IOT_B")
Step <- c("Step_A","Step_A","Step_B","Step_C","Step_D","Step_D","Step_E","Step_F",
"Step_A","Step_A","Step_B","Step_C","Step_D","Step_D","Step_E","Step_F")
Measurement <- c("Length","Breadth","Width","Height",NA,NA,NA,NA,
"Length","Breadth","Width","Height",NA,NA,NA,NA)
Passfail <- c("Pass","Pass","Fail","Fail","Pass","Pass","Pass","Pass",
"Pass","Pass","Fail","Fail","Pass","Pass","Pass","Pass")
Points <- as.character(c(7,5,3,4,0,0,0,0,17,15,13,14,0,0,0,0))
Average <- as.character(c(7.5,6.5,7.1,6.6,NA,NA,NA,NA,17.5,16.5,17.1,16.6,NA,NA,NA,NA))
Sigma <- as.character(c(2.5,2.5,2.1,2.6,NA,NA,NA,NA,12.5,12.5,12.1,12.6,NA,NA,NA,NA))
Tool <- c("ABC_1","ABC_2","ABD_1","ABD_2","COB_1","COB_2","COB_1","COB_2",
"ABC_1","ABC_2","ABD_1","ABD_2","COB_1","COB_2","COB_1","COB_2")
Dose <- as.character(c(NA,NA,NA,NA,17.1,NA,NA,17.3,NA,NA,NA,NA,117.1,NA,NA,117.3))
Machine <- c("CO2","CO6","CO3","CO6","CO2,CO6","CO2,CO3,CO4","CO2,CO3","CO2",
"CO2","CO6","CO3","CO6","CO2,CO6","CO2,CO3,CO4","CO2,CO3","CO2")
df <- data.frame(ID,ToolID,Step,Measurement,Passfail,Points,Average,Sigma,Tool,Dose,Machine)
Я пытаюсь проверить эти символьные векторы на предмет числовых значений, а затем преобразовать их с числовыми значениями в числовые. Для этого я использую пакет "varhandle" в R
library(varhandle)
if (all(check.numeric(df$Machine, na.rm=TRUE))){
# convert the vector to numeric
df$Machine <- as.numeric(df$Machine)
}
Это работает, но неэффективно, потому что мне приходится вручную вводить имена столбцов, как указано выше. Как я могу сделать это более эффективно в цикле или использовать векторизацию по нескольким столбцам? В моем фактическом наборе данных около 350 столбцов. Может ли кто-нибудь указать мне правильное направление?
@Imo Прекрасно работает. Спасибо
Просто используйте type.convert
: type.convert(df)
.
@nicola type.convert(df) ; # Error in type.convert(df) : the first argument must be of mode character
Я что-то упускаю?
df
- предмет вопроса? У меня все работает нормально. У меня R 3.5.0.
@nicola Может быть, это проблема версии, у меня R версии 3.4.4.
Да, методы data.frame
и list
для type.convert
были представлены в R-3.5.0.
Мы можем использовать функцию parse_guess
из пакета readr
, которая в основном пытается угадать тип столбцов.
library(readr)
library(dplyr)
df1 <- df %>% mutate_all(parse_guess)
str(df1)
#'data.frame': 16 obs. of 11 variables:
# $ ID : chr "A" "A" "A" "A" ...
# $ ToolID : chr "CCP_A" "CCP_A" "CCQ_A" "CCQ_A" ...
# $ Step : chr "Step_A" "Step_A" "Step_B" "Step_C" ...
# $ Measurement: chr "Length" "Breadth" "Width" "Height" ...
# $ Passfail : chr "Pass" "Pass" "Fail" "Fail" ...
# $ Points : int 7 5 3 4 0 0 0 0 17 15 ...
# $ Average : num 7.5 6.5 7.1 6.6 NA NA NA NA 17.5 16.5 ...
# $ Sigma : num 2.5 2.5 2.1 2.6 NA NA NA NA 12.5 12.5 ...
# $ Tool : chr "ABC_1" "ABC_2" "ABD_1" "ABD_2" ...
# $ Dose : num NA NA NA NA 17.1 NA NA 17.3 NA NA ...
# $ Machine : chr "CO2" "CO6" "CO3" "CO6" ...
С помощью varhandle и tidyverse:
df %>% mutate_if (purrr::compose(all,check.numeric),as.numeric)
Думаю, что самое простое решение - использовать all.is.numeric
от Hmisc
. Вот простой пример:
Hmisc::all.is.numeric(c("A", "B", "1"), what = "vector", extras = NA)
## [1] "A" "B" "1"
Hmisc::all.is.numeric(c("3", "2", "1", NA), what = "vector", extras = NA)
## [1] 3 2 1 NA
Затем вы можете использовать mutate_all из dplyr
, чтобы выполнить всю работу для data.frame:
library(dplyr)
ID <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
ToolID <- c("CCP_A","CCP_A","CCQ_A","CCQ_A","IOT_B","CCP_B","CCQ_B","IOT_B",
"CCP_A","CCP_A","CCQ_A","CCQ_A","IOT_B","CCP_B","CCQ_B","IOT_B")
Step <- c("Step_A","Step_A","Step_B","Step_C","Step_D","Step_D","Step_E","Step_F",
"Step_A","Step_A","Step_B","Step_C","Step_D","Step_D","Step_E","Step_F")
Measurement <- c("Length","Breadth","Width","Height",NA,NA,NA,NA,
"Length","Breadth","Width","Height",NA,NA,NA,NA)
Passfail <- c("Pass","Pass","Fail","Fail","Pass","Pass","Pass","Pass",
"Pass","Pass","Fail","Fail","Pass","Pass","Pass","Pass")
Points <- as.character(c(7,5,3,4,0,0,0,0,17,15,13,14,0,0,0,0))
Average <- as.character(c(7.5,6.5,7.1,6.6,NA,NA,NA,NA,17.5,16.5,17.1,16.6,NA,NA,NA,NA))
Sigma <- as.character(c(2.5,2.5,2.1,2.6,NA,NA,NA,NA,12.5,12.5,12.1,12.6,NA,NA,NA,NA))
Tool <- c("ABC_1","ABC_2","ABD_1","ABD_2","COB_1","COB_2","COB_1","COB_2",
"ABC_1","ABC_2","ABD_1","ABD_2","COB_1","COB_2","COB_1","COB_2")
Dose <- as.character(c(NA,NA,NA,NA,17.1,NA,NA,17.3,NA,NA,NA,NA,117.1,NA,NA,117.3))
Machine <- c("CO2","CO6","CO3","CO6","CO2,CO6","CO2,CO3,CO4","CO2,CO3","CO2",
"CO2","CO6","CO3","CO6","CO2,CO6","CO2,CO3,CO4","CO2,CO3","CO2")
df <- data.frame(ID,ToolID,Step,Measurement,Passfail,Points,Average,Sigma,Tool,Dose,Machine)
dt2 <- df %>% mutate_all(function(x) Hmisc::all.is.numeric(x, what = "vector", extras = NA))
## check classes
sapply(dt2, class)
## ID ToolID Step Measurement Passfail Points
## "character" "character" "character" "character" "character" "numeric"
## Average Sigma Tool Dose Machine
## "numeric" "numeric" "character" "numeric" "character"
Мы можем сделать это в base R
df[] <- lapply(df, function(x) type.convert(as.character(x), as.is = TRUE))
str(df)
#'data.frame': 16 obs. of 11 variables:
# $ ID : chr "A" "A" "A" "A" ...
# $ ToolID : chr "CCP_A" "CCP_A" "CCQ_A" "CCQ_A" ...
# $ Step : chr "Step_A" "Step_A" "Step_B" "Step_C" ...
# $ Measurement: chr "Length" "Breadth" "Width" "Height" ...
# $ Passfail : chr "Pass" "Pass" "Fail" "Fail" ...
# $ Points : int 7 5 3 4 0 0 0 0 17 15 ...
# $ Average : num 7.5 6.5 7.1 6.6 NA NA NA NA 17.5 16.5 ...
# $ Sigma : num 2.5 2.5 2.1 2.6 NA NA NA NA 12.5 12.5 ...
# $ Tool : chr "ABC_1" "ABC_2" "ABD_1" "ABD_2" ...
# $ Dose : num NA NA NA NA 17.1 NA NA 17.3 NA NA ...
# $ Machine : chr "CO2" "CO6" "CO3" "CO6" ...
Другое решение - перепечатать из пакета hablar:
library(hablar)
df %>% retype()
который дает:
# A tibble: 16 x 11
ID ToolID Step Measurement Passfail Points Average Sigma Tool Dose Machine
<chr> <chr> <chr> <chr> <chr> <int> <dbl> <dbl> <chr> <dbl> <chr>
1 A CCP_A Step_A Length Pass 7 7.50 2.50 ABC_1 NA CO2
2 A CCP_A Step_A Breadth Pass 5 6.50 2.50 ABC_2 NA CO6
3 A CCQ_A Step_B Width Fail 3 7.10 2.10 ABD_1 NA CO3
4 A CCQ_A Step_C Height Fail 4 6.60 2.60 ABD_2 NA CO6
5 A IOT_B Step_D NA Pass 0 NA NA COB_1 17.1 CO2,CO6
6 A CCP_B Step_D NA Pass 0 NA NA COB_2 NA CO2,CO3,CO4
7 A CCQ_B Step_E NA Pass 0 NA NA COB_1 NA CO2,CO3
change <- sapply(dat, function(x) all(check.numeric(x, na.rm=TRUE))
, а затемdat[change] <- lapply(dat[change], as.numeric)
.