2017-09-15 1 views
2

J'ai quelques données que je voudrais formater correctement avec quelques valeurs récapitulatives dans R. J'ai joué avec aggregate et d'autres choses comme summaryBy, mais aucun n'a produit quoi Je voulais.Résumer les données dans table par groupe pour chaque variable dans r

est ici les données

data <- data.frame(id = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48), 
        x1 = c(0.2846,0.3741,0.4208,0.3756,0.3476,0.3664,0.2852,0.3537,0.3116,0.3124,0.364,0.3934,0.3456,0.3034,0.3139,0.2766,0.3034,0.3159,0.3648,0.4046,0.3961,0.3451,0.2059,0.3184,0.2481,0.3503,0.331,0.3166,0.3203,0.1868,0.245,0.1625,0.2227,0.196,0.1697,0.2064,0.1369,0.1938,0.1498,0.1315,0.1523,0.2151,0.168,0.1427,0.3083,0.301,0.2328,0.2747), 
        x2 = c(-0.4364,-0.5262,-0.5338,-0.5037,-0.4758,-0.5003,-0.4359,-0.5002,-0.4027,-0.424,-0.4811,-0.5492,-0.3846,-0.3899,-0.4473,-0.3688,-0.3946,-0.4112,-0.4833,-0.4909,-0.4865,-0.368,0.295,-0.3221,-0.2482,-0.5424,-0.5021,-0.4453,-0.3952,0.3915,0.4472,0.364,0.436,0.3877,0.4077,0.2737,0.3104,0.3514,0.3256,0.287,0.3126,0.3648,-0.2596,-0.1913,-0.3656,-0.4598,-0.3198,-0.3685), 
        x3 = c(0.6043,0.5141,0.4638,0.486,0.3691,0.4104,0.426,0.3846,0.3191,0.4347,0.5842,0.4638,0.4418,0.523,0.5009,0.4568,0.5105,0.5421,0.4857,0.4063,0.391,0.4114,0.5189,0.5248,0.4942,0.2855,0.6107,0.4712,0.2009,0.4632,0.4457,0.3914,0.4547,0.4801,0.4873,0.5501,0.4442,0.4458,0.4651,0.5748,0.5231,0.4869,0.1769,0.099,0.5013,0.4543,0.4601,0.4396), 
        x4 = c(0.4895,0.6991,0.6566,0.6106,0.6976,0.6883,0.6533,0.6951,0.6852,0.5062,0.5682,0.6172,0.5073,0.6514,0.577,0.5228,0.6571,0.6132,0.4893,0.7904,0.6519,0.6582,0.6919,0.6011,0.6145,0.5943,0.4608,0.5997,0.4431,0.4082,0.5641,0.4535,0.5448,0.4632,0.4237,0.6187,0.4115,0.4995,0.4504,0.4103,0.4511,0.527,0.3654,0.2537,0.6317,0.478,0.5915,0.5283), 
        trt = c("A","A","A","A","A","A","A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B","B","B","B","B","B","B","B","C","C","C","C","C","C","C","C","C","C","C","C","C","D","D","D","D","D","D") 
        ) 

et je voudrais les données de résumer de la façon suivante.

  A    |   B   |   C   |   D 
-------------------+------------+----------+-----------+-----------+------------+-----------+------------- 
|  Mean  | Std.Dev | Mean  | Std.Dev | Mean  | Std.Dev | Mean  | Std.Dev | 
-----+-------------+------------+----------+-----------+-----------+------------+-----------+------------- 
| X1 | 0.3456  | 0.04104 |0.3207333 | 0.0514311 | 0.1821923 | 0.0350107 | 0.2379167 | 0.06966645 | 
-----+-------------+------------+----------+-----------+-----------+------------+-----------+------------- 
| X2 | -0.4674143 | 0.05489628 |-0.37406 | 0.2003379 | 0.3584308 | 0.05489583 | -0.3274333| 0.0936547 | 
-----+-------------+------------+----------+-----------+-----------+------------+-----------+------------- 
| X3 | 0.4589214 | 0.07952784 |0.45406 | 0.1036369 | 0.4778769 | 0.04866813 | 0.3552 | 0.1713025 | 
-----+-------------+------------+----------+-----------+-----------+------------+-----------+------------- 
| X4 | 0.6232571 | 0.0762495 |0.5976867 | 0.0914621 | 0.4789231 | 0.06686731 | 0.4747667 | 0.1428023 | 
-------------------+------------+----------+-----------+-----------+------------+-----------+------------- 

Une des façons que j'essayé de faire à l'aide globale est la suivante:

library(dplyr) 
t(data[,2:5] %>% group_by(data$trt) %>% summarise_each(funs(mean, sd))) 

mais il produit dans ce format:

  [,1]   [,2]   [,3]   [,4]   
data$trt "A"   "B"   "C"   "D"   
x1_mean "0.3456000" "0.3207333" "0.1821923" "0.2379167" 
x2_mean "-0.4674143" "-0.3740600" " 0.3584308" "-0.3274333" 
x3_mean "0.4589214" "0.4540600" "0.4778769" "0.3552000" 
x4_mean "0.6232571" "0.5976867" "0.4789231" "0.4747667" 
x1_sd "0.04104517" "0.05143110" "0.03501070" "0.06966645" 
x2_sd "0.05489628" "0.20033792" "0.05489583" "0.09365470" 
x3_sd "0.07952784" "0.10363689" "0.04866813" "0.17130249" 
x4_sd "0.07624950" "0.09146218" "0.06686731" "0.14280235" 

Est-il possible de faire ce que je veux dans R?

Répondre

2

est ici une façon de le faire:

data %>% 
    select(-id) %>% 
    gather(row, val, -trt) %>% 
    group_by(trt, row) %>% 
    summarise_all(funs(Mean=mean, `Std.Dev`=sd)) %>% 
    gather(col, val, Mean, `Std.Dev`) %>% 
    unite("col", trt, col) %>% 
    spread(col, val) 
# # A tibble: 4 x 9 
# row A_Mean A_Std.Dev B_Mean B_Std.Dev C_Mean C_Std.Dev D_Mean D_Std.Dev 
# * <chr> <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> 
# 1 x1  0.346 0.0410 0.321 0.0514 0.182 0.0350 0.238 0.0697 
# 2 x2 -0.467 0.0549 -0.374 0.200 0.358 0.0549 -0.327 0.0937 
# 3 x3  0.459 0.0795 0.454 0.104 0.478 0.0487 0.355 0.171 
# 4 x4  0.623 0.0762 0.598 0.0915 0.479 0.0669 0.475 0.143 

Vous pourriez ajouter %>% tibble::column_to_rownames("row") pour transformer la première colonne en noms de ligne, cependant, il est dépréciée.

+0

Je reçois l'erreur suivante: (., -id) 'erreur dans select: Argument utilisé (-id)'. Une idée pourquoi? Je n'ai rien changé dans votre code. – Kuni

+0

@Kuni Voir https://www.google.com/search?q=dplyr+"Error+in+select"+"unused+argument ". Probablement 'conflits()' avec un autre paquet. Essayez 'dplyr :: select' au lieu de' select'. – lukeA

+0

J'ai effacé le 'history' et' environment', mais l'erreur est apparue. Pas certain de pourquoi. Cependant, j'ai supprimé la fonction 'select' mais l'ai supprimée dans' data'. Ça a marché. C'est ce que je cherchais. Peut-être pas exactement ce dont j'ai besoin, mais tant que je l'ai, je peux toujours le transformer en ce que je veux dans un logiciel de traitement de texte. Merci @lukeA. Cela a été une aide précieuse. – Kuni

1

Voici une façon de le faire en utilisant la base R et aggregate

apply(data[,2:5], 2, function(x) aggregate(x, by=list(data$trt), FUN=summary)) 
$x1 
    Group.1 x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max. 
1  A 0.2846 0.3118 0.3506 0.3456 0.3722 0.4208 
2  B 0.2059 0.3086 0.3184 0.3207 0.3477 0.4046 
3  C 0.1315 0.1523 0.1868 0.1822 0.2064 0.2450 
4  D 0.1427 0.1842 0.2538 0.2379 0.2944 0.3083 

$x2 
    Group.1 x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max. 
1  A -0.5492 -0.5028 -0.4784 -0.4674 -0.4270 -0.3846 
2  B -0.5424 -0.4849 -0.4112 -0.3741 -0.3684 0.2950 
3  C 0.2737 0.3126 0.3640 0.3584 0.3915 0.4472 
4  D -0.4598 -0.3678 -0.3427 -0.3274 -0.2746 -0.1913 

$x3 
    Group.1 x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max. 
1  A 0.3191 0.4143 0.4528 0.4589 0.5071 0.6043 
2  B 0.2009 0.4088 0.4857 0.4541 0.5147 0.6107 
3  C 0.3914 0.4458 0.4651 0.4779 0.4873 0.5748 
4  D 0.0990 0.2426 0.4470 0.3552 0.4586 0.5013 

$x4 
    Group.1 x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max. 
1  A 0.4895 0.5788 0.6524 0.6233 0.6875 0.6991 
2  B 0.4431 0.5499 0.6011 0.5977 0.6545 0.7904 
3  C 0.4082 0.4237 0.4535 0.4789 0.5270 0.6187 
4  D 0.2537 0.3936 0.5032 0.4748 0.5757 0.6317