2017-09-09 2 views
0

J'ai créé un site Web au format xml et je l'ai déjà converti en liste. Maintenant, j'ai des difficultés à extraire des données de la liste imbriquée, car c'est très compliqué.Extraire des données à partir d'une liste imbriquée avec des boucles

Voici une partie de ma structure z2:

dput(z2) 
structure(list(scheduleList = structure(list(
schedule = structure(list(
score = structure(list(
    class = structure(list(name = list("011c"), people = list("2"), teacher = structure(list(name = list("A")), .Names = "name", id = "D29")), .Names = c("name", "people", "teacher"), id = "011c", status = "-2"), 
    class = structure(list(name = list("013"), people = list("0"), teacher = structure(list(name = list("B")), .Names = "name", id = "D14")), .Names = c("name", "people", "teacher"), id = "602d", status = "-4"), 
    class = structure(list(name = list("603"), people = list("6"), teacher = structure(list(name = list("C")), .Names = "name", id = "D31")), .Names = c("name", "people", "teacher"), id = "603", status = "-4")), 
.Names = c("class", "class", "class"), id = "1"), 
score = structure(list(
    class = structure(list(name = list("011c"), people = list("4"), teacher = structure(list(name = list("A")), .Names = "name", id = "D29")), .Names = c("name", "people", "teacher"), id = "011", status = "-2"), 
    class = structure(list(name = list("015c"), people = list("51"), teacher = structure(list(name = list("D")), .Names = "name", id = "D23")), .Names = c("name", "people", "teacher"), id = "666", status = "-4")), 
.Names = c("class","class"), id = "2"), 
score = structure(list(
    class = structure(list(name = list("017c"), people = list("1"), teacher = structure(list(name = list("E")), .Names = "name", id = "D15")), .Names = c("name", "people", "teacher"), id = "017", status = "-2"), 
    class = structure(list(name = list("019c"), people = list("22"), teacher = structure(list(name = list("F")), .Names = "name", id = "D28")), .Names = c("name", "people", "teacher"), id = "561", status = "-4"), 
    class = structure(list(name = list("562d"), people = list("28"), teacher = structure(list(name = list("G")), .Names = "name", id = "D21")), .Names = c("name", "people", "teacher"), id = "562", status = "-4")), 
.Names = c("class", "class", "class"), id = "3")), 
.Names = c("score", "score", "score"), date = "2017-01-25"), 
schedule = structure(list(
score = structure(list(
    class = structure(list(name = list("011c"), people = list("80"), teacher = structure(list(name = list("H")), .Names = "name", id = "D47")), .Names = c("name", "people", "teacher"), id = "011", status = "-4"), 
    class = structure(list(name = list("013c"), people = list("37"), teacher = structure(list(name = list("I")), .Names = "name", id = "D18")), .Names = c("name", "people", "teacher"), id = "669", status = "-4"), 
    class = structure(list(name = list("751d"), people = list("15"), teacher = structure(list(name = list("J")), .Names = "name", id = "D61")), .Names = c("name", "people", "teacher"), id = "751", status = "-4")), 
.Names = c("class", "class", "class"), id = "1"), 
score = structure(list(
    class = structure(list(name = list("015c"), people = list("29"), teacher = structure(list(name = list("K")), .Names = "name", id = "D13")), .Names = c("name", "people", "teacher"), id = "567", status = "-2"), 
    class = structure(list(name = list("666d"), people = list("14"), teacher = structure(list(name = list("L")), .Names = "name", id = "D16")), .Names = c("name", "people", "teacher"), id = "666", status = "-4")), 
.Names = c("class", "class"), id = "2"), 
score = structure(list(
    class = structure(list(name = list("015c"), people = list("21"), teacher = structure(list(name = list("M")), .Names = "name", id = "D22")), .Names = c("name", "people", "teacher"), id = "015", status = "-4"), 
    class = structure(list(name = list("602d"), people = list("18"), teacher = structure(list(name = list("N")), .Names = "name", id = "D10")), .Names = c("name", "people", "teacher"), id = "602", status = "-4")), 
.Names = c("class", "class"), id = "3")), 
.Names = c("score", "score", "score"), date = "2017-01-26"), 
schedule = structure(list(
score = structure(list(
    class = structure(list(name = list("011c"), people = list("33"), teacher = structure(list(name = list("O")), .Names = "name", id = "D30")), .Names = c("name", "people", "teacher"), id = "011", status = "-4"), 
    class = structure(list(name = list("013c"), people = list("70"), teacher = structure(list(name = list("A")), .Names = "name", id = "D29")), .Names = c("name", "people", "teacher"), id = "601", status = "-2"), 
    class = structure(list(name = list("603d"), people = list("0"), teacher = structure(list(name = list("P")), .Names = "name", id = "D27")), .Names = c("name", "people", "teacher"), id = "603", status = "-4")), 
.Names = c("class", "class", "class"), id = "1"), 
score = structure(list(
    class = structure(list(name = list("011c"), people = list("56"), teacher = structure(list(name = list("H")), .Names = "name", id = "D47")), .Names = c("name", "people", "teacher"), id = "602", status = "-4"), 
    class = structure(list(name = list("666d"), people = list("8"), teacher = structure(list(name = list("Q")), .Names = "name", id = "D20")), .Names = c("name", "people", "teacher"), id = "666", status = "-4")), 
.Names = c("class", "class"), id = "2"), 
score = structure(list(
    class = structure(list(name = list("017c"), people = list("5"), teacher = structure(list(name = list("R")), .Names = "name", id = "D30")), .Names = c("name", "people", "teacher"), id = "017", status = "-4"), 
    class = structure(list(name = list("021c"), people = list("6"), teacher = structure(list(name = list("S")), .Names = "name", id = "D19")), .Names = c("name", "people", "teacher"), id = "561", status = "-4")), 
.Names = c("class", "class"), id = "3")), 
.Names = c("score", "score", "score"), date = "2017-01-27")), 
.Names = c("schedule", "schedule", "schedule"), from = "2017-01-25", to = "2017-01-27")), 
.Names = "scheduleList") 

Cela fait partie de z2:

$scheduleList$schedule$score$class 
$scheduleList$schedule$score$class$name 
$scheduleList$schedule$score$class$name[[1]] 
[1] "017C" 


$scheduleList$schedule$score$class$people 
$scheduleList$schedule$score$class$people[[1]] 
[1] "5" 


$scheduleList$schedule$score$class$teacher 
$scheduleList$schedule$score$class$teacher$name 
$scheduleList$schedule$score$class$teacher$name[[1]] 
[1] "R" 


attr(,"id") 
[1] "D30" 

attr(,"id") 
[1] "017" 
attr(,"status") 
[1] "-4" 

$scheduleList$schedule$score$class 
$scheduleList$schedule$score$class$name 
$scheduleList$schedule$score$class$name[[1]] 
[1] "021C" 


$scheduleList$schedule$score$class$people 
$scheduleList$schedule$score$class$people[[1]] 
[1] "6" 


$scheduleList$schedule$score$class$teacher 
$scheduleList$schedule$score$class$teacher$name 
$scheduleList$schedule$score$class$teacher$name[[1]] 
[1] "S" 


attr(,"id") 
[1] "D19" 

attr(,"id") 
[1] "561" 
attr(,"status") 
[1] "-4" 

attr(,"id") 
[1] "3" 

attr(,"date") 
[1] "2017-01-27" 

attr(,"from") 
[1] "2017-01-25" 
attr(,"to") 
[1] "2017-01-27" 

Je dois extraire l'information nécessaire à partir de la liste imbriquée, puisque je suis nouveau à cela, donc j'utilise le moyen le plus inefficace de le faire:

for (i in 1:length(z2[[1]])){    #length(z2[[1]])=7 
    for (j in 1:length(z2[[1]][[i]])){  #length(z2[[1]][[i]])=3 
    for (k in 1:length(z[[1]][[i]][[j]])){ 
     cbind=(
     Date=attr(z2[[1]][[i]],"date"),    #date 
     Score=attr(z2[[1]][[i]][[j]],"id"),   #score 
     People=z2[[1]][[i]][[j]][[k]][[2]][[1]],  #people 
     TName=z2[[1]][[i]][[j]][[k]][[3]][[1]][[1]], #teacher name 
     TID=attr(z2[[1]][[i]][[j]][[k]][[3]],"id"), #teacher ID 
     CName=z2[[1]][[i]][[j]][[k]][[1]][[1]],  #class name 
     CID=attr(z2[[1]][[i]][[j]][[k]],"id"),   #class ID 
     CSta=attr(z2[[1]][[i]][[j]][[k]],"status") #class status 
    ) 
    } 
    } 
} 

Cela ne fonctionne pas dans mes boucles. Et je veux le sortir comme un cadre de données ou un tableau. Le résultat que je pensais:

Date  Score TID TName CName CID CSta People 
2017-01-25 1  D14  B  013c 602 -4  0 
2017-01-26 2  D16  L  666d 666 -4 14 

exemple XML site de format:

<result status="success"> 
    <code>1</code> 
    <note>success</note> 
    <scheduleList from="2017-01-25" to="2017-01-26"> 
    <schedule date="2017-01-25"> 
     <score id="1"> 
     <class id="011" status="-4"> 
      <name>011c</name> 
      <people>116</people> 
      <teacher id="D47"> 
      <name>A</name> 
      </teacher> 
     </class> 
     <class id="669" status="-4"> 
      <name>669d</name> 
      <people>10</people> 
      <teacher id="D29"> 
      <name>B</name> 
      </teacher> 
     </class> 
     </score> 
     <score id="2"> 
     <class id="013" status="-4"> 
      <name>013c</name> 
      <people>9</people> 
      <teacher id="D9"> 
      <name>C</name> 
      </teacher> 
     </class> 
     </score> 
     <score id="3"> 
     <class id="016" status="-4"> 
      <name>016c</name> 
      <people>36</people> 
      <teacher id="D18"> 
      <name>D</name> 
      </teacher> 
     </class> 
     <class id="019" status="-4"> 
      <name>019c</name> 
      <people>9</people> 
      <teacher id="D30"> 
      <name>E</name> 
      </teacher> 
     </class> 
     </score> 
    </schedule> 
    <schedule date="2017-01-26"> 
     <score id="1"> 
     <class id="011" status="-2"> 
      <name>011c</name> 
      <people>2</people> 
      <teacher id="D29"> 
      <name>F</name> 
      </teacher> 
     </class> 
     <class id="013" status="-2"> 
      <name>013c</name> 
      <people>0</people> 
      <teacher id="D14"> 
      <name>G</name> 
      </teacher> 
     </class> 
     </score> 
     <score id="2"> 
     <class id="011" status="-2"> 
      <name>011c</name> 
      <people>4</people> 
      <teacher id="D29"> 
      <name>F</name> 
      </teacher> 
     </class> 
     </score> 
     <score id="3"> 
     <class id="017" status="-2"> 
      <name>017c</name> 
      <people>1</people> 
      <teacher id="D141"> 
      <name>H</name> 
      </teacher> 
     </class> 
     <class id="019" status="-4"> 
      <name>019c</name> 
      <people>22</people> 
      <teacher id="D291"> 
      <name>I</name> 
      </teacher> 
     </class> 
     <class id="020" status="-4"> 
      <name>020c</name> 
      <people>8</people> 
      <teacher id="D143"> 
      <name>J</name> 
      </teacher> 
     </class> 
     </score> 
    </schedule> 
    </scheduleList> 
</result> 

Code:

url <- "xxxxxxx" 
session <-html_session(url) 
form <-html_form(read_html(url))[[1]] 
filled_form <- set_values(form, 
          "fromDate" = "2017-01-25", 
          "toDate" = "2017-01-26", 
          "userid" = "xxx", 
          "Password" = "aaa") 
s <- submit_form(session,filled_form) 
z = read_xml(s$response) 
+0

En utilisant directement un paquet comme 'xml2', vous pouvez extraire le format XML sans le convertir en liste. Avez-vous une url ou quelque chose de reproductible? Je pourrais te montrer. – cderv

+0

@cderv Bonjour! Je viens de l'énumérer ci-dessus. J'ai copié une partie du format xml du site Web. Est-ce reproductible? l'extérieur de xml comme le résultat, le code et la note sont quelque chose que je n'ai pas besoin. Je voudrais vraiment apprendre de vous :) – Ching

Répondre

3

Vous n'êtes pas assignez le résultat de cbind. (Et il est utilisé dans un mauvais sens, ne faites pas cbind=something, le signe égal est une erreur.)
Ceci est une façon rapide et peut-être inefficace de le faire.

result <- data.frame() 

for (i in 1:length(z2[[1]])){    #length(z2[[1]])=7 
    for (j in 1:length(z2[[1]][[i]])){  #length(z2[[1]][[i]])=3 
    for (k in 1:length(z2[[1]][[i]][[j]])){ 
     row <- cbind(
      Date=attr(z2[[1]][[i]],"date"),    #date 
      Score=attr(z2[[1]][[i]][[j]],"id"),   #score 
      People=z2[[1]][[i]][[j]][[k]][[2]][[1]],  #people 
      TName=z2[[1]][[i]][[j]][[k]][[3]][[1]][[1]], #teacher name 
      TID=attr(z2[[1]][[i]][[j]][[k]][[3]],"id"), #teacher ID 
      CName=z2[[1]][[i]][[j]][[k]][[1]][[1]],  #class name 
      CID=attr(z2[[1]][[i]][[j]][[k]],"id"),   #class ID 
      CSta=attr(z2[[1]][[i]][[j]][[k]],"status") #class status 
     ) 
     result <- rbind(result, row) 
    } 
    } 
} 

head(result) 
     Date Score People TName TID CName CID CSta 
1 2017-01-25  1  2  A D29 011c 011c -2 
2 2017-01-25  1  0  B D14 013 602d -4 
3 2017-01-25  1  6  C D31 603 603 -4 
4 2017-01-25  2  4  A D29 011c 011 -2 
5 2017-01-25  2  51  D D23 015c 666 -4 
6 2017-01-25  3  1  E D15 017c 017 -2 
+0

Neat! Je sais que c'est très inefficace mais c'est plus facile pour moi d'apprendre. Merci beaucoup: D Si vous avez d'autres suggestions, s'il vous plaît veuillez me le faire savoir – Ching

1

En utilisant purrr et dplyr paquets de la tidyverse pourrait aider à cette tâche

z2$scheduleList %>% 
    map_df(~ map_df(.x, 
        ~ data_frame(
        TID = map_chr(.x, list("teacher", attr_getter("id"))), 
        TName = map_chr(.x, list("teacher", "name", 1)), 
        CName = map_chr(.x, list("name", 1)), 
        CID = map_chr(.x, list(attr_getter("id"))), 
        Csta = map_chr(.x, list(attr_getter("status"))), 
        People = map_chr(.x, list("people", 1))) %>% 
        mutate(Score = attr(.x, "id") 
        )) %>% 
      mutate(Date = attr(.x, "date"))) %>% 
    select(Date, Score, everything()) 

#> # A tibble: 22 x 8 
#>   Date Score TID TName CName CID Csta People 
#>   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> 
#> 1 2017-01-25  1 D29  A 011c 011c -2  2 
#> 2 2017-01-25  1 D14  B 013 602d -4  0 
#> 3 2017-01-25  1 D31  C 603 603 -4  6 
#> 4 2017-01-25  2 D29  A 011c 011 -2  4 
#> 5 2017-01-25  2 D23  D 015c 666 -4  51 
#> 6 2017-01-25  3 D15  E 017c 017 -2  1 
#> 7 2017-01-25  3 D28  F 019c 561 -4  22 
#> 8 2017-01-25  3 D21  G 562d 562 -4  28 
#> 9 2017-01-26  1 D47  H 011c 011 -4  80 
#> 10 2017-01-26  1 D18  I 013c 669 -4  37 
#> # ... with 12 more rows 

Je ne sais pas si elle est plus efficace, mais il pourrait être plus clair à lire et à comprendre.

Très bon cas d'utilisation pour comprendre purrr cependant.

+0

Plus efficace et fonctionne parfaitement! Je vous remercie! : D – Ching

+0

Heureux que ça aide! 'purrr' est puissant quand il est bien compris. En outre, je travaille directement sur un exemple pour vous avec la manipulation de xml. – cderv