Я хочу измерить однофакторный дисперсионный анализ с 4 уровнями: ctl
против schiz
, bp
, dep
. Я ожидаю, что aov.run
вернет числовой вектор с 3 переменными, так как я сравниваю ctl
с тремя другими уровнями. Однако я получаю только 2 переменные в aov.run
. Почему?
d <- read.table("filtered.mds", header=T)
ann <- read.table("clinical_table.txt", header=T, sep = "\t")
# Create new dataframe
dat <- t(cbind(d$C1,d$C2))
colnames(dat) <- paste(ann$Profile, data.table::rowid(ann$Profile), sep = "_")
rownames(dat) <- c("C1", "C2")
# Levels
ctl <- grepl("^Unaffected control", ann$Profile)
schiz <- grepl("^Schiz.", ann$Profile)
bp <- grepl("^BP", ann$Profile)
dep <- grepl("^Dep.", ann$Profile)
# 1-factor ANOVA with 4 levels
aov.lvl <- function(x,s1,s2,s3,s4) {
x1 <- as.numeric(x[s1])
x2 <- as.numeric(x[s2])
x3 <- as.numeric(x[s3])
x4 <- as.numeric(x[s4])
fac <- c(rep("A",length(x1)), rep("B",length(x2)), rep("C",length(x3)), rep("D",length(x4)))
a.dat <- data.frame(as.factor(fac),c(x1,x2,x3,x4))
names(a.dat) <- c("factor","express")
p.out <- summary(aov(express~factor, a.dat))[[1]][1,5]
return(p.out)
}
aov.run <- apply(dat, 1, aov.lvl, s1=ctl, s2=schiz, s3=bp, s4=dep)
фрейм данных d
> dput(d)
structure(list(FID = c("AC10", "AC11", "AC12", "AC13", "AC14",
"AC15", "AC17", "AC18", "AC19", "AC1", "AC20", "AC21", "AC22",
"AC23", "AC24", "AC25", "AC26", "AC27", "AC29", "AC2", "AC30",
"AC31", "AC32", "AC33", "AC34", "AC35", "AC36", "AC37", "AC38",
"AC39", "AC3", "AC40", "AC41", "AC42", "AC43", "AC45", "AC46",
"AC47", "AC48", "AC49", "AC50", "AC51", "AC52", "AC53", "AC54",
"AC55", "AC56", "AC57", "AC58", "AC5", "AC60", "AC61", "AC62",
"AC63", "AC64", "AC65", "AC66", "AC67", "AC69", "AC6", "AC70",
"AC71", "AC72", "AC73", "AC74", "AC75", "AC76", "AC77", "AC78",
"AC79", "AC7", "AC80", "AC81", "AC82", "AC83", "AC84", "AC86",
"AC87", "AC88", "AC89", "AC8", "AC90", "AC91", "AC92", "AC9",
"AC100", "AC101", "AC102", "AC103", "AC104", "AC105", "AC16",
"AC68", "AC93", "AC94", "AC95", "AC96", "AC97", "AC99", "DE10",
"DE12", "DE13", "DE14", "DE15", "DE16", "DE17", "DE18", "DE19",
"DE1", "DE20", "DE21", "DE22", "DE23", "DE25", "DE26", "DE27",
"DE2", "DE33", "DE34", "DE35", "DE36", "DE37", "DE38", "DE39",
"DE3", "DE40", "DE41", "DE42", "DE44", "DE45", "DE46", "DE47",
"DE48", "DE49", "DE4", "DE50", "DE51", "DE52", "DE53", "DE54",
"DE55", "DE56", "DE57", "DE58", "DE59", "DE60", "DE7", "DE9",
"DE29", "DE30", "DE32", "DE43", "DE5"), IID = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), SOL = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), C1 = c(-0.00385609, 0.0101138, -0.0146168, -0.0218236, -0.0134745,
-0.017089, 0.0152448, 0.0134359, 0.00540102, -0.0125389, 0.00463956,
-0.00416079, -0.000325898, 0.0132781, 0.0130666, 0.00718399,
-0.0051912, -0.0227934, 0.0364974, -0.0180301, -0.0226556, -0.00585266,
0.0258924, -0.00994298, -0.00380612, 0.0187883, 0.0103367, 0.00747272,
0.0191431, -0.00501846, -0.00118336, 0.0361201, 0.00830498, 0.00380194,
0.00667686, -0.000441697, -0.00170991, -0.0281008, -0.00424591,
0.0213412, 0.00261405, 0.016154, 0.0098956, 0.0141544, 0.0367203,
0.0144693, 0.0256731, -0.00218851, 0.0204603, -0.000603019, -0.00504176,
-0.00917368, 0.00237875, 0.0175946, 0.0188388, 0.0368965, -0.00408476,
0.00871812, -0.00851917, 0.0252035, -0.00915532, 0.0223745, 0.016866,
0.026825, 0.0366276, 0.0540474, 0.0386237, 0.0029996, 0.0207176,
0.0177353, -0.0066377, 0.0343811, 0.0282509, 0.00526683, 0.0459516,
0.00976286, 0.0259005, -0.00104822, -0.012696, 0.0134071, 0.0231658,
0.00359455, 0.0194968, -0.000936478, -0.0029218, -0.0058512,
-0.000837274, -0.0129465, -0.0102079, -0.00559039, 0.0118966,
0.00147658, 0.0120396, -0.0104779, -0.0315149, -0.0115454, -0.0122457,
-6.72242e-05, 0.00370599, -0.0164126, -0.0107853, -0.0271741,
-0.0212005, -0.0445118, -0.0387773, -0.025109, -0.0321735, -0.0398603,
-0.0266408, -0.0260984, -0.0296337, -0.0185381, -0.0403944, 0.0197937,
-0.0176322, -0.013238, -0.0071666, -7.27277e-05, 0.00397489,
0.0335056, -0.00604706, -0.00926438, 0.00706601, -0.0156982,
-0.0275085, -0.00864179, -0.0247967, -0.030564, -0.00767327,
-0.0235161, 0.00649758, -0.0329062, -0.0016138, -0.00701695,
0.00819454, 0.0100377, 0.0250199, -0.0493141, -0.0216641, -0.0244709,
-0.00466616, 0.016751, -0.0191688, -0.00492488, -0.0162364, -0.0167085,
-0.0113427, 0.000422333, 0.030274, 0.0317995, 0.00237194, -0.00693838,
-0.0100835), C2 = c(0.000865365, -0.001752, 0.0189917, -0.023343,
-0.0340531, -0.0258976, -0.00794043, 0.0173163, 0.00639341, -0.0343077,
0.01083, -0.0402179, 0.0158751, -0.00262893, -0.0216757, -0.00261259,
-0.00542089, -0.00515714, 0.0105216, -0.0193606, 0.00692795,
-0.0117295, -0.0235627, -0.00850041, -0.0156109, -0.00871875,
-0.0163218, 0.0227143, -0.0161961, -0.0176719, -0.0070994, 0.0262932,
0.00164033, -0.00969917, -0.0197631, -0.0154387, -0.0194608,
0.00442207, -0.0234804, 0.00822342, -0.00657274, -0.0092332,
0.0130892, -0.0345162, -0.0114187, -0.0129497, -0.00306092, 0.0417858,
0.0262002, -0.0188849, -0.0184154, -0.0109956, -0.0151195, -0.00414531,
0.010064, 0.0308816, -0.0153337, 0.0157867, -0.0289866, -0.0106713,
0.000112714, -0.00152177, 0.0184509, 0.0112357, 0.00097954, 0.032083,
0.0190258, -0.0371498, -0.0307498, -0.00947645, -0.00198995,
0.015845, -0.0240248, -0.0122369, -0.00107049, -0.0144661, 0.0207883,
-0.0418619, -0.0123712, -0.0212721, -0.00667244, -0.028512, -0.00522357,
-0.018842, -0.0123026, -0.00511655, 0.0188473, 0.00739189, 0.0321578,
-0.015449, 0.0214631, -0.00995001, -0.00144645, 0.00934907, 0.0344757,
-0.0220224, 0.0121403, -0.00615057, -0.0208969, 0.0313899, -0.0251011,
0.011635, 0.00536455, 0.0233033, -0.0019204, 0.0273593, 0.00844028,
0.00181444, 0.02824, 0.0255231, 0.00266055, -0.00850383, -0.0129938,
0.0268634, 0.0195986, 0.0320615, -0.0026514, 0.0127147, 0.014279,
0.0553434, -0.020963, 0.00629119, -0.0244099, -0.0080923, 0.0173508,
0.0485753, -0.00666049, 0.0501603, 0.0029162, 0.0267363, 0.0066606,
0.00857736, 0.0172693, -0.00827586, -0.0117478, -0.00336638,
0.00954265, -0.00889617, 0.00290055, 0.0229832, 0.0504569, 0.025979,
-0.00795356, -0.0135421, -0.00359528, 0.0150037, -0.0105817,
0.0167827, 0.0110882, 0.00200862, -0.00597284, -0.0188371, -0.00827599
)), class = "data.frame", row.names = c(NA, -153L))
колонкаann$Profile
> dput(ann$Profile)
c("Schiz.", "Schiz.", "Schiz.", "BP", "BP", "Unaffected control",
"Schiz.", "BP", "Unaffected control", "BP", "BP", "BP", "Schiz.",
"BP", "Unaffected control", "Schiz.", "Schiz.", "Unaffected control",
"Unaffected control", "BP", "Unaffected control", "Schiz.", "BP",
"Unaffected control", "BP", "Unaffected control", "BP", "Schiz.",
"Unaffected control", "Schiz.", "Schiz.", "Schiz.", "Schiz.",
"BP", "Unaffected control", "Schiz.", "BP", "Schiz.", "BP", "Unaffected control",
"BP", "Unaffected control", "Unaffected control", "Unaffected control",
"Unaffected control", "Schiz.", "Unaffected control", "BP", "BP",
"BP", "Unaffected control", "BP", "BP", "BP", "BP", "Unaffected control",
"Schiz.", "Unaffected control", "BP", "BP", "Unaffected control",
"Unaffected control", "BP", "Schiz.", "BP", "Schiz.", "BP", "Unaffected control",
"Schiz.", "Unaffected control", "Schiz.", "Unaffected control",
"Schiz.", "Schiz.", "Unaffected control", "Unaffected control",
"Unaffected control", "Schiz.", "Schiz.", "BP", "BP", "Unaffected control",
"Unaffected control", "Schiz.", "Schiz.", "Schiz.", "Schiz.",
"BP", "Unaffected control", "BP", "Unaffected control", "BP",
"Schiz.", "Schiz.", "Schiz.", "Unaffected control", "Unaffected control",
"Schiz.", "Unaffected control", "Unaffected control", "Unaffected control",
"BP", "BP", "Dep.", "Unaffected control", "Unaffected control",
"Dep.", "Dep.", "Dep.", "Dep.", "Dep.", "Unaffected control",
"Unaffected control", "Schiz.", "Dep.", "BP", "Dep.", "Schiz.",
"Schiz.", "Schiz.", "BP", "Unaffected control", "Unaffected control",
"Unaffected control", "BP", "BP", "Dep.", "Schiz.", "Dep.", "BP",
"Unaffected control", "Unaffected control", "Schiz.", "Schiz.",
"Unaffected control", "Unaffected control", "BP", "BP", "Schiz.",
"Dep.", "BP", "Dep.", "BP", "Schiz.", "Unaffected control", "Dep.",
"BP", "Schiz.", "Dep.", "Dep.", "BP", "BP", "Schiz.")
aov.run
возвращает p-значение, связанное с выполнением ANOVA для каждого из ваших столбцов отдельно. Для каждого ANOVA существует только одно значение p, поэтому вы получаете одно для C1 и одно для C2, всего два. Это работает, как и ожидалось!
Похоже, вас интересуют апостериорные результаты дисперсионного анализа, полученные с помощью теста Тьюки или чего-то подобного. HSD Тьюки даст вам p-значение каждого сравнения отдельно для 6 сравнений на ANOVA («Отклонение-BP», «Schiz.-BP», «Незатронутый контроль-BP», «Schiz.-Dep.», «Незатронутый control-Dep.» и «Незатронутый контроль-Schiz.»), и похоже, что вас интересуют только «Незатронутый контроль-BP», «Незатронутый контроль-Dep.» и «Незатронутый контроль-Schiz». сравнения. Вы можете получить их, заменив вызовы summary()
на tukeyHSD()
:
v <- cbind(d, Profile=ann$Profile)
# p-value for each ANOVA
summary(aov(C1~Profile, data = v))[[1]][1,5]
summary(aov(C2~Profile, data = v))[[1]][1,5]
# p-value for each comparison within the ANOVA
TukeyHSD(aov(C1~Profile, data = v))
TukeyHSD(aov(C2~Profile, data = v))