如何在R的循环中创建基于文件名的名称的数据框?

时间:2020-03-11 03:53:17

标签: r loops dataframe for-loop

我正在尝试创建一个函数,该函数循环遍历目录中的文件,创建多个基于文件名命名的数据帧,并将新创建的数据帧写入.csv。但是,我创建的解决方案仅适用于目录中的最后一个文件,而不适用于所有文件。有人可以指出我的错误吗?

 ##Peak annotation function for all the files in the directory 
 ##Loading required libraries 
 library(GenomicFeatures)
 library(GenomicRanges)
 library(AnnotationDbi)
 library(ChIPseeker)
 ##Making some annotations 
 hg38_TxDB <- makeTxDbFromGFF("/Users/dari_obukhova/data/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf")
 saveDb(hg38_TxDB, file = "/Users/dari_obukhova/data/Homo_sapiens/UCSC/hg38/Annotation/TxDB.sqlite")
 hg38_Annotation <- loadDb("/Users/dari_obukhova/data/Homo_sapiens/UCSC/hg38/Annotation/TxDB.sqlite")

 ##Function to use for all the files in the directory 
 all_peaks_files <- list.files(pattern = ".xls", full.names = T)
 for (file in all_peaks_files) {
      peak_list <- list()
      peaks_df <- read.delim(fileName, comment.char = "#")
      peaks_gr <- makeGRangesFromDataFrame(peaks_df, keep.extra.columns = T)
      annotated_peaks_df <- as.data.frame(annotatePeak(peaks_gr, TxDb = hg38_Annotation))
      peak_list[[(length(peak_list) + 1)]] <- annotated_peaks_df
  }
  peak_list

但是,返回的列表只有一个元素(all_peaks_files中的列表文件)。而且,我不确定如何根据每个特定的文件名重命名我创建的每个df。我将不胜感激任何建议! <3

1 个答案:

答案 0 :(得分:0)

所有人,谢谢您的好建议。我能够将我的代码修改为有效的代码。这是我修改的代码。

##Function to annotate some files with peaks generated by CHIPSeq

##Loading necessary libraries 
library(GenomicFeatures)
library(GenomicRanges)
library(AnnotationDbi)
library(ChIPseeker)

##Making some annotations of a used genome 
hESCpeaks_df <- read.delim("H3K27ac_hESC_peaks.xls", comment.char = "#")
hg38_TxDB <- makeTxDbFromGFF("/Users/dari_obukhova/data/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf")
saveDb(hg38_TxDB, file = "/Users/dari_obukhova/data/Homo_sapiens/UCSC/hg38/Annotation/TxDB.sqlite")
hg38_Annotation <- loadDb("/Users/dari_obukhova/data/Homo_sapiens/UCSC/hg38/Annotation/TxDB.sqlite")

##Annotating all files in the directory 
all_peaks_files <- list.files(pattern = ".xls" )  ##choosing all the files with .xls extension from the working directory
peak_list <- list() ##Empty list for files to be added in 

for (file in all_peaks_files) {
  
  peaks_df <- read.delim(file, comment.char = "#")
  peaks_gr <- makeGRangesFromDataFrame(assign(file, peaks_df), keep.extra.columns = T)
  annotated_peaks_df <- as.data.frame(annotatePeak(peaks_gr, TxDb = hg38_Annotation))
  peak_list[[(length(peak_list) + 1)]] <- assign(file, annotated_peaks_df)
}
peak_list

相关问题