Generate a test dataset (from https://kbroman.org/blog/2017/05/11/reading/writing-biggish-data-revisited/):
n_ind <- 500
n_snps <- 1e5 / 5
ind_names <- paste0("ind", 1:n_ind)
snp_names <- paste0("snp", 1:n_snps)
sigX <- matrix(rnorm(n_ind*n_snps), nrow=n_ind)
sigY <- matrix(rnorm(n_ind*n_snps), nrow=n_ind)
dimnames(sigX) <- list(ind_names, paste0(snp_names, ".X"))
dimnames(sigY) <- list(ind_names, paste0(snp_names, ".Y"))
db <- cbind(data.frame(id=ind_names, stringsAsFactors=FALSE),
sigX, sigY)
csv_file <- "db.csv"
fwrite()
testlibrary(data.table)
system.time(
fwrite(db, csv_file, nThread = 1)
)
## user system elapsed
## 7.553 0.223 7.797
system.time(
fwrite(db, csv_file, nThread = 8)
)
## user system elapsed
## 8.251 0.316 4.678
readr::write_csv()
testAPI docs: https://readr.tidyverse.org/reference/write_delim.html
library(readr)
system.time(
write_csv(db, path=csv_file)
)
## user system elapsed
## 15.126 9.845 25.219
if (file.exists(csv_file)) {
file.remove(csv_file)
}
## [1] TRUE