Trying to answer to SO questions: https://stackoverflow.com/questions/51020513/r-nested-list-to-wide-dataframe
# Very nested example taken from stackoverflow.com
x <- list()
x[[1]] <- list(uuid = "123",
relationships = list(websites = list(items = list(
properties.website_type = c("homepage", "facebook", "twitter",
"linkedin"),
properties.url = c("www.example1.com", "www.fbex1.com",
"www.twitterex1.com", "www.linkedinex1.com")))))
x[[2]] <- list(uuid = "987",
relationships = list(websites = list(items = list(
properties.website_type = c("homepage","facebook","twitter"),
properties.url = c("www.example2.com", "www.fbex2.com",
"www.twitterex2.com")))))
str(x)
#> List of 2
#> $ :List of 2
#> ..$ uuid : chr "123"
#> ..$ relationships:List of 1
#> .. ..$ websites:List of 1
#> .. .. ..$ items:List of 2
#> .. .. .. ..$ properties.website_type: chr [1:4] "homepage" "facebook" "twitter" "linkedin"
#> .. .. .. ..$ properties.url : chr [1:4] "www.example1.com" "www.fbex1.com" "www.twitterex1.com" "www.linkedinex1.com"
#> $ :List of 2
#> ..$ uuid : chr "987"
#> ..$ relationships:List of 1
#> .. ..$ websites:List of 1
#> .. .. ..$ items:List of 2
#> .. .. .. ..$ properties.website_type: chr [1:3] "homepage" "facebook" "twitter"
#> .. .. .. ..$ properties.url : chr [1:3] "www.example2.com" "www.fbex2.com" "www.twitterex2.com"
Lists can be subset according to their index
,
name
or logical statement
# Structure
# list x = [id1 id2 ]
# list id1 = [uuid, rel_l,]
# list rel = [web_l]
# list web = [items_l]
# list items = [char, char ]
str(x)
length(x) # 2
mode(x)
class(x)
# Indexing
# name the lists
names(x)
names(x) <- c("id1", "id2")
names(x)
# [ desired return value is 1st element AS a list (keeps the box)
x1_l <- x[1]
str(x1_l)
# [[ | $ desired return value is 1st element AS element !!! (out of the box)
x1_e <- x$id1 # qui sono scesa di 1 livello!!!!
str(x1_e)
x1_e <- x[[1]] # qui sono scesa di 1 livello!!!!
str(x1_e)
# If you wanted to get the CONTENT of the 1st element
x1_e_1 <- x[[1]][[1]]
str(x1_e_1) # char "123"
x["id2"]
str(x["id2"]) # list of 1
# indexing > 1 element (AS LISTS)
x[c("id1","id2")] # List of 2
str(x[c("id1","id2")]) # list
# -- OPPURE --- step by step
# get the CONTENT inside the 2nd Variable of the 1st element
x1_e_2 <-x[[1]][[2]]
str(x1_e_2)
# list
# $websites
# $websites$items
# $websites$items$properties.website_type
# [1] "homepage" "facebook" "twitter" "linkedin"
#
# $websites$items$properties.url
# [1]"www.example1.com""www.fbex1.com""www.twitterex1.com""www.linkedinex1.com"
# Make this a tibble with a list in one column "websites"
x1_e_2_t <- as_tibble(x1_e_2)
str(x1_e_2_t) # tibble [1 × 1l-l]
# Unnest wider the column --> 2 cols:'properties.website_type' 'properties.url'
x1_e_2_t <- x1_e_2_t %>%
tidyr::unnest_wider( col = "websites")
x1_e_2_t # tibble: 1 × 2(l)
# Unnestlonger the column
# --> 4 rows/each col:"homepage""facebook""twitter""linkedin"
x1_e_2_t <- x1_e_2_t %>%
tidyr::unnest_longer(data =. , col = c("properties.website_type",
"properties.url" ))
x1_e_2_t # --> tibble: 4 × 2
# -- OPPURE --- all in one
# turn first level list into a tibble of 1 col (which is a list of lists)
x1_e_2_t <- as_tibble(x1_e_2) %>% # --> tibble [1 × 1l-l]
# split in two cols
tidyr::unnest_wider(data =., col = "websites") %>% # --> tibble: 1 × 2(l)
# split in 4 rows
tidyr::unnest_longer(data =., col = c("properties.website_type",
"properties.url")) # --> tibble: 4 × 2
str(x1_e_2_t)
#convert list to dataframe in long format
x_df <- do.call(rbind, lapply(x, data.frame, stringsAsFactors = FALSE))
x_df # 7 x 3
#> uuid relationships.websites.items.properties.website_type
#> id1.1 123 homepage
#> id1.2 123 facebook
#> id1.3 123 twitter
#> id1.4 123 linkedin
#> id2.1 987 homepage
#> id2.2 987 facebook
#> id2.3 987 twitter
#> relationships.websites.items.properties.url
#> id1.1 www.example1.com
#> id1.2 www.fbex1.com
#> id1.3 www.twitterex1.com
#> id1.4 www.linkedinex1.com
#> id2.1 www.example2.com
#> id2.2 www.fbex2.com
#> id2.3 www.twitterex2.com
#final result
x_df_spread <- x_df %>%
spread(relationships.websites.items.properties.website_type, relationships.websites.items.properties.url)
x_df_spread # 2 x 5
#> uuid facebook homepage linkedin
#> 1 123 www.fbex1.com www.example1.com www.linkedinex1.com
#> 2 987 www.fbex2.com www.example2.com <NA>
#> twitter
#> 1 www.twitterex1.com
#> 2 www.twitterex2.com
names(x) <- c("id1", "id2")
view(x)
# in steps
# x_df <- x %>% as_tibble_col( .)
# str(x_df)
# x_df <- x_df %>% unnest_wider(col = "value") # 1
# str(x_df)
# x_df <- x_df %>% unnest_longer(col = "relationships") # 2
# str(x_df)
# x_df <- x_df %>% unnest_wider(col = "relationships") # 2
# str(x_df)
# x_df <- x_df %>% unnest_wider(col = "items") # 2
# str(x_df)
# x_df <- x_df %>%
# unnest_longer(col = c("properties.website_type", "properties.url"))
# str(x_df)
# --- Iterations of unnest:
x_df2 <- x %>% tibble::as_tibble_col( .) %>%
tidyr::unnest_wider(col ="value") %>%
tidyr::unnest_longer(col ="relationships") %>%
tidyr::unnest_wider(col = "relationships") %>% # not sure why? twice ?!?!
tidyr::unnest_wider(col = "items") %>%
tidyr::unnest_longer(col = c("properties.website_type",
"properties.url")) %>%
# --- Lastly, group by id:
group_by(uuid) %>%
tidyr::pivot_wider(data = .,
names_from = properties.website_type,
values_from = c("properties.url"))
x_df2
#> # A tibble: 2 × 6
#> # Groups: uuid [2]
#> uuid relationships_id homepage facebook twitter linkedin
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 123 websites www.example1.com www.fbex1.… www.tw… www.lin…
#> 2 987 websites www.example2.com www.fbex2.… www.tw… <NA>
# starting point
str(x$id1) # 2nd level list is a `list`
dt_list <- purrr::map(x, as.data.table)
str(dt_list$id1) # 2nd level list is a `data.table` !!!
dt <- rbindlist(dt_list, use.names = T, fill = TRUE, idcol = F) # 2nd COLUMN is of lists
# uuid relationships
# 1: 123 <list[1]>
# 2: 987 <list[1]>
dt_df <- data.frame(matrix(unlist(dt), nrow=length(dt), byrow=F)) # but lost names of vars
# 1 list of 2 lists (3 & 2 obs each)
nested_l <- list(
list( id = "a", values = c(1, 2, 3) )
, list( id = "b", values = c(78, 99) )
)
str(nested_l)
#> List of 2
#> $ :List of 2
#> ..$ id : chr "a"
#> ..$ values: num [1:3] 1 2 3
#> $ :List of 2
#> ..$ id : chr "b"
#> ..$ values: num [1:2] 78 99
# 1) TRANSFORM TO DF WITH COLUMN-list
# 2x2 (1 column is a list )
df_l <- as.data.frame(do.call( rbind, nested_l))
df_l
#> id values
#> 1 a 1, 2, 3
#> 2 b 78, 99
# 2.a) TRANSFORM TO DF WITH COLUMN-vector
# 5 obs x2 (1 column is a list )
df <- tidyr::unnest(data = as.data.frame(do.call(rbind, nested_l)),
cols = values)
df
#> # A tibble: 5 × 2
#> id values
#> <list> <dbl>
#> 1 <chr [1]> 1
#> 2 <chr [1]> 2
#> 3 <chr [1]> 3
#> 4 <chr [1]> 78
#> 5 <chr [1]> 99
# 2.b) TRANSFORM TO DF WITH COLUMN-vector
# 5 obs x2 (1 column is a list )
# `map_dfr` return a data frame created by row-binding (`map_dfc` column-binding)
df_purr <- purrr::map_dfr(.x = nested_l,
~ tidyr::unnest(data.frame(.),
cols = c("id", "values")))
df_purr
#> # A tibble: 5 × 2
#> id values
#> <chr> <dbl>
#> 1 a 1
#> 2 a 2
#> 3 a 3
#> 4 b 78
#> 5 b 99
# 2.c) TRANSFORM TO DF WITH COLUMN-vector
# actually no need to unnest
df_purr2 <- purrr::map_dfr(.x = nested_l,
~. )
df_purr2
#> # A tibble: 5 × 2
#> id values
#> <chr> <dbl>
#> 1 a 1
#> 2 a 2
#> 3 a 3
#> 4 b 78
#> 5 b 99
# HARDER ------------------------------------------------------------------
# A distinct analysis.
level3.list1 <- list(
key1 = "name 1"
, key2 = 14
, key3 = 15
, key4 = c(15, 29, 43, 57)
, key5 = 332
, key6 = c("A", "B", "C")
)
level3.list2 <- list(
key1 = "name 2"
, key2 = 28
, key3 = 15
, key4 = c(15, 43, 71, 99)
, key5 = 332
, key6 = c("Y", "Z")
)
level3.list3 <- list(
key1 = "name 3"
, key2 = 56
, key3 = 22
, key4 = c(78, 134, 190, 246)
, key5 = 112
, key6 = c("V")
)
# Grouped analyses.
level2.list1 <- list( level3.list1, level3.list2 )
level2.list2 <- list( level3.list3 )
# Planned samplings.
level1.list1 <- list(
code = "ABC123"
, type = "spot"
, matrix = "water"
, analyses = level2.list1
)
level1.list2 <- list(
code = "GHS332"
, type = "mix"
, matrix = "water"
, analyses = level2.list2
)
# All the samplings.
level0.list <- list(level1.list1, level1.list2)
str(level0.list)
#> List of 2
#> $ :List of 4
#> ..$ code : chr "ABC123"
#> ..$ type : chr "spot"
#> ..$ matrix : chr "water"
#> ..$ analyses:List of 2
#> .. ..$ :List of 6
#> .. .. ..$ key1: chr "name 1"
#> .. .. ..$ key2: num 14
#> .. .. ..$ key3: num 15
#> .. .. ..$ key4: num [1:4] 15 29 43 57
#> .. .. ..$ key5: num 332
#> .. .. ..$ key6: chr [1:3] "A" "B" "C"
#> .. ..$ :List of 6
#> .. .. ..$ key1: chr "name 2"
#> .. .. ..$ key2: num 28
#> .. .. ..$ key3: num 15
#> .. .. ..$ key4: num [1:4] 15 43 71 99
#> .. .. ..$ key5: num 332
#> .. .. ..$ key6: chr [1:2] "Y" "Z"
#> $ :List of 4
#> ..$ code : chr "GHS332"
#> ..$ type : chr "mix"
#> ..$ matrix : chr "water"
#> ..$ analyses:List of 1
#> .. ..$ :List of 6
#> .. .. ..$ key1: chr "name 3"
#> .. .. ..$ key2: num 56
#> .. .. ..$ key3: num 22
#> .. .. ..$ key4: num [1:4] 78 134 190 246
#> .. .. ..$ key5: num 112
#> .. .. ..$ key6: chr "V"
Using purrr::map_dfr
returns a data frame created by
row-binding (last column is of lists)
# "Flattening"
# 3x4 [l]
df_l.lev2 <- purrr::map_dfr(level0.list, ~.)
Using tibble
+ unnest_...
returns a data
frame only of atomic vectors
# 1) Step by step
u1 <- level0.list %>% tibble() %>%
unnest_wider(col = ".")
u2 <- u1 %>% unnest_longer(col = "analyses")
u3 <- u2 %>% unnest_wider(col = "analyses")
u4 <- u3 %>% unnest_longer(col = "key4")
u5 <- u4 %>% unnest_longer(col = "key6")
# 2) All in one with piping
u <- tibble(level0.list) %>%
# integers are just a quick way to select the columns to operate on
unnest_wider(1) %>% # u1
unnest_longer(4) %>% # u2
unnest_wider(4) %>% # u3
unnest_longer(7) %>% # u4
unnest_longer(9) # u 24 obs X 9 var