2011-05-14 6 views
0

Je m'excuse, je ne sais pas comment utiliser HTML ou autre chose vraiment pour obtenir ce look "joli". Particulièrement pour rendre mes exemples de données utiles pour vous tous. J'apprends juste ceci comme je vais. J'essaye d'exécuter une analyse de cluster sur les variables PersVel, TurnVel, et Velocity (et peut-être d'autres mais ceci fera pour l'instant). J'ai les données déjà séparées par année, mais j'ai un nombre variable de personnes par an (ID est le nom de ceux-ci). Je veux lancer l'analyse de k-means et/ou de cluster hiérarchique sur ces variables PER individuelles. Les données ci-dessous ne représentent que 20 points de données. Une fois que les groupes d'intérêt ont été identifiés, je veux relier cela à la date du calendrier ou à la variable date/heure. En fin de compte, je veux savoir quand les clusters se produisaient. J'ai déjà du code pour transformer l'ID en niveaux et on m'a dit que j'avais besoin de standardiser les variables pour le clustering k-means (donc je suppose que vous feriez la même chose pour la hiérarchie mais ce n'est pas un gros problème). Juste comment l'obtenir à travers les individus?analyse de cluster en r avec plusieurs personnes

IDNames = levels(Data$ID) 
for (i in 1:(length(IDNames)){ 

maintenant quoi ??? comment puis-je écrire la partie suivante pour faire ce test?

structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("c_002", 
"c_102", "c_104", "c_401", "c_402", "c_406", "c_409", "c_411", 
"c_412", "c_413", "c_414", "c_415", "c_417", "c_418", "c_420", 
"c_421", "c_423", "c_425", "c_426", "c_602", "c_604", "c_9809", 
"c_9814", "c_9815", "c_9816", "c_9819", "c_9908", "c_9911"), class = "factor"), 
    x = c(229539.8109, 231122.438, 231290.6472, 231355.2828, 
    230910.8116, 230928.7384, 231164.6592, 231113.9708, 231186.0565, 
    231270.4396, 231334.5768, 231153.0715, 231215.2728, 231200.7462, 
    231325.1136, 231777.6369, 231522.6185, 231674.6925, 231684.3388, 
    231924.464, 232065.5961), y = c(2229114.92, 2229455.232, 
    2230388.77, 2232003.32, 2232559.623, 2232521.689, 2232434.829, 
    2232996.109, 2233038.608, 2233160.861, 2233371.836, 2233471.823, 
    2233307.792, 2233285.778, 2233204.662, 2231630.353, 2231054.838, 
    2231056.299, 2230981.267, 2230840.082, 2230998.991), DateTime = structure(c(1148853637, 
    1148871660, 1148889637, 1148907637, 1148925637, 1148943666, 
    1148961637, 1148979636, 1148997636, 1149015637, 1149033637, 
    1149051690, 1149069666, 1149087665, 1149105637, 1149123683, 
    1149141654, 1149159637, 1149177636, 1149195696, 1149213696 
    ), class = c("POSIXct", "POSIXt"), tzone = "UTC"), RunClock_days = c(1179.58332175926, 
    1179.79192129630, 1179.99998842593, 1180.20832175926, 1180.41665509259, 
    1180.62532407407, 1180.83332175926, 1181.04164351852, 1181.24997685185, 
    1181.45832175926, 1181.66665509259, 1181.87560185185, 1182.08365740741, 
    1182.29197916667, 1182.49998842593, 1182.70885416667, 1182.91685185185, 
    1183.12498842593, 1183.33331018519, 1183.54233796296, 1183.75067129630 
    ), CalDay = c("148", "149", "149", "149", "149", "149", "150", 
    "150", "150", "150", "151", "151", "151", "151", "151", "152", 
    "152", "152", "152", "152", "153"), DistX = c(1582.62709999998, 
    168.209200000012, 64.6355999999796, -444.4712, 17.9268000000156, 
    235.920799999993, -50.6883999999845, 72.085699999996, 84.3831000000064, 
    64.1371999999974, -181.505300000019, 62.2013000000152, -14.5266000000120, 
    124.367400000017, 452.523300000001, -255.018400000001, 152.073999999993, 
    9.64629999999306, 240.125200000009, 141.132099999988, -3159.38569999998 
    ), DistY = c(340.311999999918, 933.538000000175, 1614.54999999981, 
    556.303000000305, -37.9340000003576, -86.8599999998696, 561.280000000261, 
    42.4989999998361, 122.253000000026, 210.975000000093, 99.9869999997318, 
    -164.030999999959, -22.0139999999665, -81.1159999999218, 
    -1574.30899999989, -575.51500000013, 1.46100000012666, -75.032000000123, 
    -141.185000000056, 158.908999999985, -5943.84400000004), 
    Dist = c(1618.80227174238, 948.571311188026, 1615.84326693116, 
    712.058758417295, 41.9566265835052, 251.402632191101, 563.564151002218, 
    83.6810202224823, 148.547310896621, 220.508573640299, 207.223488285096, 
    175.428534402698, 26.3749559916007, 148.482509538166, 1638.05560483262, 
    629.48542442515, 152.081017872048, 75.649534880978, 278.555768025113, 
    212.533150194039, 6731.34455348268), LnDist = c(7.38944181635036, 
    6.8549569696676, 7.3876122460922, 6.56816043387389, 3.73663638428818, 
    5.527055766233, 6.33428117083723, 4.42701219219356, 5.00090349939957, 
    5.39593657685343, 5.33379786440982, 5.16723174859221, 3.27241492322993, 
    5.00046717041827, 7.4012652106211, 6.44490269900689, 5.02441339116178, 
    4.32611129191379, 5.62961828357648, 5.35909797711072, 8.81453018774869 
    ), TimeDif = c(5.00638888888889, 4.99361111111111, 5, 5, 
    5.00805555555556, 4.99194444444444, 4.99972222222222, 5, 
    5.00027777777778, 5, 5.01472222222222, 4.99333333333333, 
    4.99972222222222, 4.99222222222222, 5.01277777777778, 4.99194444444444, 
    4.99527777777778, 4.99972222222222, 5.01666666666667, 5, 
    4.98361111111111), Velocity = c(323.347288368894, 189.956985051838, 
    323.168653386232, 142.411751683459, 8.3778277053979, 50.3616646757533, 
    112.719092372242, 16.7362040444965, 29.7078117453384, 44.1017147280597, 
    41.3230243076688, 35.1325502809141, 5.27528426966845, 29.7427684363120, 
    326.776026676129, 126.100246393108, 30.4449571450467, 15.130747573283, 
    55.5260667159693, 42.5066300388078, 1350.69619266137), LnVelocity = c(5.77872694180175, 
    5.24679765206538, 5.7781743336581, 4.95872252143979, 2.12558865719019, 
    3.91923026414518, 4.72489881550196, 2.81757427975946, 3.39141003295307, 
    3.78649866441933, 3.72141983391736, 3.55912805917125, 1.66303256789466, 
    3.39258602467242, 5.78927500251085, 4.83707719691907, 3.41592036944078, 
    2.71672893657851, 4.0168525810497, 3.74966006467662, 7.2083754367735 
    ), Heading = c(1.35899167682096, 0.178271769107279, 0.040011832151945, 
    5.60907076311214, 2.70012174242416, 1.92356952639201, 6.193121040462, 
    1.03808707214764, 0.604141059039809, 0.295125938335282, 5.21590486031959, 
    2.77914091577713, 3.72488212039469, 2.14873677066758, 2.86169595063768, 
    3.55870493136089, 1.56118945741765, 3.01373153808326, 2.10231890072709, 
    0.726219128764754, 3.63015207232184), Angle = c(0.609592148368293, 
    -1.18071990771368, -0.138259936955334, 5.5690589309602, -2.90894902068798, 
    -0.776552216032153, 4.26955151407000, -5.15503396831437, 
    -0.433946013107828, -0.309015120704527, 4.92077892198431, 
    -2.43676394454246, 0.945741204617556, -1.57614534972711, 
    0.712959179970102, 0.697008980723212, -1.99751547394325, 
    1.45254208066561, -0.911412637356172, -1.37609977196233, 
    2.90393294355708), CosAngle = c(0.81988159459602, 0.380259094713527, 
    0.990457310809811, 0.755665715954353, -0.973060304449898, 
    0.713334063328187, -0.428504949324728, 0.428331029577178, 
    0.907313699540722, 0.952633553896418, 0.206884943442359, 
    -0.761722542920473, 0.585141974104434, -0.00534899742449928, 
    0.756429664977827, 0.766765630720815, -0.413886381311673, 
    0.117978826229562, 0.612629855005907, 0.193468831871728, 
    -0.971891607429047), SinAngle = c(0.572533117682013, -0.924880003507292, 
    -0.137819865996876, -0.654957499179294, -0.230550740410589, 
    -0.700824167745161, -0.90353943378483, 0.903621894987807, 
    -0.420454338336196, -0.304120555028232, -0.978365279523375, 
    -0.647903362861135, 0.810930866437557, -0.999985694010946, 
    0.654075043050515, 0.641927151275992, -0.910328546934967, 
    0.993016110927459, -0.7903699518298, -0.981106421900391, 
    0.235428765041538), PersVel = c(265.106490396188, 72.2328711703229, 
    320.084755370955, 107.615678296195, -8.15213157764327, 35.9246908991267, 
    -48.300688964897, 7.1686355095929, 26.9543045799223, 42.0127732343175, 
    8.54911154675927, -26.7612555392593, 3.08679025151586, -0.159093991763311, 
    247.183080381409, 96.6893349596614, -12.6007531419523, 1.78510783867173, 
    34.0169262012526, 8.22370806041186, -1312.73029383395), TurnVel = c(185.127031103868, 
    -175.687417000979, -44.5390605040812, -93.273644736341, -1.93151438051183, 
    -35.2946717326457, -101.846144898756, 15.1232004135905, -12.4907783308025, 
    -13.4122379607943, -40.4290122275236, -22.7624974728921, 
    4.27789084350665, -29.7423429365923, 213.736043716065, 80.9471719423284, 
    -27.7149135993477, 15.0250761106466, -43.886134675599, -41.7035277044184, 
    317.992736584573)), .Names = c("ID", "x", "y", "DateTime", 
"RunClock_days", "CalDay", "DistX", "DistY", "Dist", "LnDist", 
"TimeDif", "Velocity", "LnVelocity", "Heading", "Angle", "CosAngle", 
"SinAngle", "PersVel", "TurnVel"), row.names = 150:170, class = "data.frame") 

Répondre

1

Pour faire simple (et en supposant que votre data.frame original a été appelé orgData):

results<-list() 
IDNames = levels(Data$ID) 
for (i in 1:(length(IDNames)){ 
    dataForCurrentIndividual<-orgData[orgData$ID==IDNames[i],] 
    #now do whatever analysis you're interested in on data.frame dataFor... 

    #after your analysis, I assume the result is in a variable resCurIndv 
    results[[i]]<-resCurIndv #keep your results in the i'th spot in the results list 
} 

Une fois que vous avez fait cela, il est probablement une bonne étape pour rendre votre code plus R » ish.

D'abord, transformez ce qui précède en fonction. C'est: prendre tout le code de votre ont écrit où mon commentaire (en commençant par le #) est, et la transformer en une fonction comme ceci:

analysisPerIndividual<-function(dataForCurrentIndividual){ 
    #now do whatever analysis you're interested in on data.frame dataFor... 

    #after your analysis, I assume the result is in a variable resCurIndv 
    return(resCurIndv) 
} 

Maintenant, vous pouvez bien utiliser comme si (notez que vous avez pour installer le paquet plyr pour cela):

require(plyr) 
dlply(orgData, "ID", analysisPerIndividual) 

pour normaliser vos variables, voir ?scale, pour k-means voir ?kmeans. Bonne chance!

Questions connexes