diff --git a/assets/dep_lncselect.R b/assets/dep_lncselect.R
new file mode 100644
index 0000000..d33e919
--- /dev/null
+++ b/assets/dep_lncselect.R
@@ -0,0 +1,298 @@
+ #!/usr/bin/env Rscript
+    
+    # Carregar bibliotecas necessárias
+    library(tidyverse)
+    library(rtracklayer)
+
+    # Obter variáveis de ambiente
+    annotated_gtf <- Sys.getenv("ANNOTATED_GTF")
+    cpc2_results <- Sys.getenv("CPC2_RESULTS")
+    plek_results <- Sys.getenv("PLEK_RESULTS")
+    pfam_domtblout <- Sys.getenv("PFAM_DOMTBLOUT")
+    reference_gtf <- Sys.getenv("REFERENCE_GTF")
+    prefix <- Sys.getenv("PREFIX")
+    min_length <- as.numeric(Sys.getenv("MIN_LENGTH"))
+    pfam_evalue <- as.numeric(Sys.getenv("PFAM_EVALUE"))
+
+    # Configurar log
+    log_file <- paste0(prefix, ".log")
+    sink(log_file, append = TRUE, split = TRUE)
+    
+    cat("=== SELEÇÃO DE lncRNAs ===\n")
+    cat("Iniciando seleção de lncRNAs e combinação com anotação de referência\n")
+    cat("Data/hora de início:", as.character(Sys.time()), "\n")
+    cat("\n")
+
+    # Ler arquivos de entrada
+    cat("Lendo arquivos de entrada...\n")
+    cat("  GTF anotado:", annotated_gtf, "\n")
+    cat("  Resultados CPC2:", cpc2_results, "\n")
+    cat("  Resultados PLEK:", plek_results, "\n")
+    cat("  Domínios Pfam:", pfam_domtblout, "\n")
+    cat("  GTF de referência:", reference_gtf, "\n")
+    cat("\n")
+
+    # Carregar GTF anotado
+    dup_reference_annot <- as.data.frame(rtracklayer::import(annotated_gtf))
+    cat("GTF anotado carregado:", nrow(dup_reference_annot), "linhas\n")
+
+    # Carregar resultados CPC2
+    CPC2 <- read.delim(cpc2_results)
+    colnames(CPC2) <- c("transcript_id", "transcript_length", "peptide_length", "Fickett_score",
+                        "pI", "ORF_integrity", "coding_probability", "CPC2")
+    cat("Resultados CPC2 carregados:", nrow(CPC2), "transcritos\n")
+    cat("  Codificantes:", sum(CPC2$CPC2 == "coding"), "\n")
+    cat("  Não-codificantes:", sum(CPC2$CPC2 == "noncoding"), "\n")
+
+    # Carregar resultados PLEK
+    PLEK <- read.delim(plek_results, header = FALSE)
+    PLEK <- PLEK %>% mutate(transcript_id = str_extract(V3, "(?<=\\>).*?(?= loc:)"))
+    colnames(PLEK) <- c("PLEK", "value", "features", "transcript_id")
+    cat("Resultados PLEK carregados:", nrow(PLEK), "transcritos\n")
+    cat("  Codificantes:", sum(PLEK$PLEK == "Coding"), "\n")
+    cat("  Não-codificantes:", sum(PLEK$PLEK == "Non-coding"), "\n")
+
+    # Carregar domínios Pfam
+    pfam <- readr::read_table(pfam_domtblout, comment = "#", col_names = FALSE)
+    cat("Domínios Pfam carregados:", nrow(pfam), "hits\n")
+
+    # Identificar transcritos com domínios significativos
+    transcripts_with_domain <- as.data.frame(pfam) %>%
+    dplyr::filter(X7 <= pfam_evalue) %>%
+    mutate(transcript_id = str_remove(X1, "\\.p[0-9]+$")) %>%
+    distinct(transcript_id) %>%
+    mutate(domain_present = "yes")
+
+    cat("Transcritos com domínios Pfam significativos (E-value <=", pfam_evalue, "):", nrow(transcripts_with_domain), "\n")
+    
+    # Combinar todos os dados
+    cat("\nCombinando dados...\n")
+    gtf_complete <- merge(dup_reference_annot, transcripts_with_domain, by = "transcript_id", all.x = TRUE)
+    gtf_complete <- merge(gtf_complete, PLEK[,c("transcript_id", "PLEK")], by = "transcript_id", all.x = TRUE)
+    gtf_complete <- merge(gtf_complete, CPC2[,c("transcript_id", "transcript_length", "peptide_length", "CPC2")], by = "transcript_id", all.x = TRUE)
+    
+    cat("GTF completo criado:", nrow(gtf_complete), "linhas\n")
+    
+    # Aplicar filtros para seleção de lncRNAs
+    cat("\nAplicando filtros para seleção de lncRNAs...\n")
+    cat("Critérios:\n")
+    cat("  - class_code em: x, i, u\n")
+    cat("  - Comprimento >= min_length nt\n")
+    cat("  - PLEK == 'Non-coding' E CPC2 == 'noncoding'\n")
+    cat("  - Sem domínios Pfam significativos\n")
+    cat("\n")
+    
+    # Contar transcritos em cada etapa do filtro
+    step1 <- gtf_complete %>% dplyr::filter(class_code %in% c("x", "i", "u"))
+    cat("Após filtro class_code:", length(unique(step1$transcript_id)), "transcritos únicos\n")
+    
+    step2 <- step1 %>% dplyr::filter(transcript_length >= min_length)
+    cat("Após filtro comprimento:", length(unique(step2$transcript_id)), "transcritos únicos\n")
+    
+    step3 <- step2 %>% dplyr::filter(PLEK == "Non-coding" & CPC2 == "noncoding")
+    cat("Após filtros CPC2 e PLEK:", length(unique(step3$transcript_id)), "transcritos únicos\n")
+    
+    step4 <- step3 %>% dplyr::filter(is.na(domain_present))
+    cat("Após filtro domínios Pfam:", length(unique(step4$transcript_id)), "transcritos únicos\n")
+    
+    # Selecionar lncRNAs finais
+    lncRNA <- step4 %>% pull(transcript_id) %>% unique()
+    cat("\nlncRNAs selecionados:", length(lncRNA), "transcritos\n")
+    
+    # Criar GTF de lncRNAs
+    gtf_lncRNA <- gtf_complete %>% 
+      filter(transcript_id %in% lncRNA) %>%
+      mutate(transcript_biotype = ifelse(type == "transcript", "lncRNA", ""))
+    
+    cat("GTF lncRNA criado:", nrow(gtf_lncRNA), "linhas\n")
+    
+    # Verificar genes com diferentes class_codes
+    cat("\nVerificando genes com diferentes class_codes...\n")
+    lncRNAs_check <- gtf_lncRNA %>%
+      dplyr::filter(type == "transcript") %>%
+      dplyr::select(class_code, gene_id) %>%
+      distinct()
+    
+    duplicated_genes <- nrow(lncRNAs_check) - length(unique(gtf_lncRNA$gene_id))
+    cat("Genes com diferentes class_codes:", duplicated_genes, "\n")
+    
+    # Exportar GTFs intermediários
+    cat("\nExportando GTFs...\n")
+    rtracklayer::export(gtf_lncRNA, "gtf_lncRNA.gtf")
+    rtracklayer::export(gtf_complete, "gtf_complete.gtf")
+    cat("  gtf_lncRNA.gtf exportado\n")
+    cat("  gtf_complete.gtf exportado\n")
+    
+    # Combinar com anotação de referência
+    cat("\nCombinando com anotação de referência...\n")
+    reference <- as.data.frame(rtracklayer::import(reference_gtf))
+    reference_filter <- reference %>% filter(type %in% c("transcript", "exon"))
+    cat("Referência carregada:", nrow(reference_filter), "linhas (transcript + exon)\n")
+    
+    combined_stringtie_ncbi <- dplyr::bind_rows(reference_filter, gtf_lncRNA)
+
+    combined_stringtie_ncbi$Dbxref <- sapply(combined_stringtie_ncbi$Dbxref, function(x) {
+      if (is.null(x)) return(NA_character_)
+      paste(x, collapse = ",")
+    })
+
+    combined_stringtie_ncbi$end_range <- sapply(combined_stringtie_ncbi$end_range, function(x) {
+      if (is.null(x)) return(NA_character_)
+      paste(x, collapse = ",")
+    })
+
+    combined_stringtie_ncbi$start_range <- sapply(combined_stringtie_ncbi$start_range, function(x) {
+      if (is.null(x)) return(NA_character_)
+      paste(x, collapse = ",")
+    })
+
+    combined_stringtie_ncbi$Parent <- sapply(combined_stringtie_ncbi$Parent, function(x) {
+      if (is.null(x)) return(NA_character_)
+      paste(x, collapse = ",")
+    })
+
+    combined_stringtie_ncbi$Note <- sapply(combined_stringtie_ncbi$Note, function(x) {
+      if (is.null(x)) return(NA_character_)
+      paste(x, collapse = ",")
+    })
+
+    combined_stringtie_ncbi$gene_id[is.na(combined_stringtie_ncbi$gene_id)] <- "unknown_gene"
+    combined_stringtie_ncbi$transcript_id[is.na(combined_stringtie_ncbi$transcript_id)] <- "unknown_transcript"
+    combined_stringtie_ncbi$exon_number[is.na(combined_stringtie_ncbi$exon_number)] <- "0"
+
+    # Criar a coluna `attribute` no formato GTF
+    combined_stringtie_ncbi$attribute <- paste0(
+      'gene_id "', combined_stringtie_ncbi$gene_id, '"; ',
+      'transcript_id "', combined_stringtie_ncbi$transcript_id, '"; ',
+      'exon_number "', combined_stringtie_ncbi$exon_number, '";'
+    )
+
+    # Criar o GRanges com os campos obrigatórios
+    combined_stringtie_ncbi <- GRanges(
+      seqnames = combined_stringtie_ncbi$seqnames,
+      ranges = IRanges(start = combined_stringtie_ncbi$start, end = combined_stringtie_ncbi$end),
+      strand = combined_stringtie_ncbi$strand,
+      
+      # metadados (evite nomes reservados)
+      source = combined_stringtie_ncbi$source,
+      type = combined_stringtie_ncbi$type,
+      class_code = combined_stringtie_ncbi$class_code,
+      transcript_id = combined_stringtie_ncbi$transcript_id,
+      gene_id = combined_stringtie_ncbi$gene_id,
+      db_xref = sapply(combined_stringtie_ncbi$Dbxref, paste, collapse = ","),
+      gbkey = combined_stringtie_ncbi$gbkey,
+      locus_tag = combined_stringtie_ncbi$locus_tag,
+      partial = combined_stringtie_ncbi$partial,
+      orig_protein_id = combined_stringtie_ncbi$orig_protein_id,
+      orig_transcript_id = combined_stringtie_ncbi$orig_transcript_id,
+      product = combined_stringtie_ncbi$product,
+      transcript_biotype = combined_stringtie_ncbi$transcript_biotype,
+      exon_number = combined_stringtie_ncbi$exon_number,
+      pseudo = combined_stringtie_ncbi$pseudo,
+      PLEK = combined_stringtie_ncbi$PLEK,
+      transcript_length = combined_stringtie_ncbi$transcript_length,
+      peptide_length = combined_stringtie_ncbi$peptide_length,
+      CPC2 = combined_stringtie_ncbi$CPC2,
+      xloc = combined_stringtie_ncbi$xloc,
+      tss_id = combined_stringtie_ncbi$tss_id,
+      cmp_ref = combined_stringtie_ncbi$cmp_ref,
+      transcript_id_stringtie = NA_character_,
+      gene_id_stringtie = NA_character_,
+      score = combined_stringtie_ncbi$score,
+      phase = combined_stringtie_ncbi$phase,
+      attribute = combined_stringtie_ncbi$attribute
+    )
+
+    cat("GTF combinado criado:", nrow(combined_stringtie_ncbi), "linhas\n")
+
+    # Verificar sobreposição
+    overlap_check <- sum(gtf_lncRNA$transcript_id %in% reference$transcript_id)
+    cat("Sobreposição transcript_id (esperado 0):", overlap_check, "\n")
+
+    rtracklayer::export(combined_stringtie_ncbi, "combined_stringtie_ncbi.gtf")
+    cat("  combined_stringtie_ncbi.gtf exportado\n")
+
+    # Renomear IDs
+    cat("\nRenomeando IDs...\n")
+    combined_stringtie_ncbi_new_names <- as.data.frame(rtracklayer::import("combined_stringtie_ncbi.gtf"))
+
+    new_names <- combined_stringtie_ncbi_new_names %>%
+      dplyr::filter(type == "transcript") %>%
+      mutate(transcript_id_new = ifelse(source == "StringTie",
+                                        paste0("Pb18_lncRNA", class_code, gsub("MSTRG.", "_", transcript_id)),
+                                        transcript_id)) %>%
+      mutate(gene_id_new = ifelse(source == "StringTie",
+                                  paste0("Pb18_lncRNA", gsub("MSTRG.", "_", gene_id)),
+                                  gene_id)) %>%
+      dplyr::select(transcript_id, transcript_id_new, gene_id, gene_id_new) %>% distinct()
+
+    cat("Novos nomes criados para:", nrow(new_names), "transcritos\n")
+
+    # Aplicar novos nomes
+    combined_stringtie_ncbi_new_names <- left_join(combined_stringtie_ncbi_new_names,
+                                                  new_names %>% dplyr::select(transcript_id, transcript_id_new) %>% distinct(), 
+                                                  by = "transcript_id")
+
+    combined_stringtie_ncbi_new_names <- left_join(combined_stringtie_ncbi_new_names,
+                                                  new_names %>% dplyr::select(gene_id, gene_id_new) %>% distinct(), 
+                                                  by = "gene_id")
+
+    # Reorganizar colunas
+    combined_stringtie_ncbi_new_names <- combined_stringtie_ncbi_new_names %>%
+      dplyr::rename(transcript_id_stringtie = transcript_id) %>%
+      dplyr::rename(gene_id_stringtie = gene_id) %>%
+      dplyr::rename(transcript_id = transcript_id_new) %>%
+      dplyr::rename(gene_id = gene_id_new) %>%
+      dplyr::select(seqnames, start, end, width, strand,
+                    source, type, score, phase, gene_id,
+                    transcript_id, db_xref, gbkey, locus_tag, partial,
+                    orig_protein_id, orig_transcript_id, product, transcript_biotype, exon_number,
+                    pseudo, PLEK, transcript_length, peptide_length, CPC2,
+                    xloc, class_code, tss_id, cmp_ref,
+                    transcript_id_stringtie, gene_id_stringtie)
+
+    
+    rtracklayer::export(combined_stringtie_ncbi_new_names, "combined_stringtie_ncbi_new_names.gtf")
+    cat("  combined_stringtie_ncbi_new_names.gtf exportado\n")
+    
+    # Estatísticas finais
+    cat("\n=== ESTATÍSTICAS FINAIS ===\n")
+    cat("Total de lncRNAs identificados:", length(lncRNA), "\n")
+    
+    # Estatísticas por class_code
+    class_stats <- gtf_lncRNA %>% 
+      filter(type == "transcript") %>% 
+      count(class_code) %>% 
+      arrange(desc(n))
+    
+    cat("Distribuição por class_code:\n")
+    for(i in 1:nrow(class_stats)) {
+      cat("  ", class_stats$class_code[i], ":", class_stats$n[i], "\n")
+    }
+    
+    # Estatísticas de comprimento
+    length_stats <- gtf_lncRNA %>% 
+      filter(type == "transcript") %>% 
+      summarise(
+        mean_length = mean(transcript_length, na.rm = TRUE),
+        median_length = median(transcript_length, na.rm = TRUE),
+        min_length = min(transcript_length, na.rm = TRUE),
+        max_length = max(transcript_length, na.rm = TRUE)
+      )
+    
+    cat("Estatísticas de comprimento:\n")
+    cat("  Média:", round(length_stats$mean_length, 2), "nt\n")
+    cat("  Mediana:", length_stats$median_length, "nt\n")
+    cat("  Mínimo:", length_stats$min_length, "nt\n")
+    cat("  Máximo:", length_stats$max_length, "nt\n")
+    
+    cat("\nProcessamento concluído com sucesso!\n")
+    cat("Data/hora de término:", as.character(Sys.time()), "\n")
+    sink()
+    
+    cat('"', Sys.getenv("TASK_PROCESS", "SELECT_LNCRNAS"), '":\n', file="versions.yml", sep="")
+    cat('    r-base: "', R.version.string, '"\n', file="versions.yml", append=TRUE, sep="")
+    cat('    tidyverse: "', as.character(packageVersion("tidyverse")), '"\n', file = "versions.yml", append = TRUE, sep="")
+    cat('    rtracklayer: "', as.character(packageVersion("rtracklayer")), '"\n', file = "versions.yml", append = TRUE, sep="")
+
+    sink()
\ No newline at end of file
diff --git a/assets/lncselect.R b/assets/lncselect.R
new file mode 100644
index 0000000..2e787cd
--- /dev/null
+++ b/assets/lncselect.R
@@ -0,0 +1,174 @@
+################################################################################
+###################### Select lncRNAs (back to shell) ##########################
+################################################################################
+library(tidyverse)
+library(dplyr)
+library(stringr)
+
+    annotated_gtf <- Sys.getenv("ANNOTATED_GTF")
+    cpc2_results <- Sys.getenv("CPC2_RESULTS")
+    plek_results <- Sys.getenv("PLEK_RESULTS")
+    pfam_domtblout <- Sys.getenv("PFAM_DOMTBLOUT")
+    reference_gtf <- Sys.getenv("REFERENCE_GTF")
+    prefix <- Sys.getenv("PREFIX")
+    min_length <- as.numeric(Sys.getenv("MIN_LENGTH"))
+    pfam_evalue <- as.numeric(Sys.getenv("PFAM_EVALUE"))
+
+dup_reference_annot <- as.data.frame(rtracklayer::import(annotated_gtf))
+
+# dup_reference_annot <- dup_reference_annot %>%
+#   mutate(
+#     gene_id = ifelse(
+#       str_detect(gene_id, "gene-PADG_"),
+#       str_extract(gene_id, "gene-PADG_[0-9]+"),
+#       gene_id  # mantém MSTRG.xxx se não tiver gene-PADG
+#     )
+#   )
+
+CPC2 <- read.delim(cpc2_results)
+colnames(CPC2) <- c("transcript_id", "transcript_length", "peptide_length", "Fickett_score",
+                    "pI", "ORF_integrity", "coding_probability", "CPC2")
+
+PLEK <- read.delim(plek_results, header = F)
+PLEK <- PLEK %>% mutate(transcript_id = str_extract(V3, "(?<=\\>).*?(?= loc:)"))
+colnames(PLEK) <- c("PLEK", "value", "features", "transcript_id")
+
+pfam <- readr::read_table(pfam_domtblout, comment = "#", col_names = FALSE)
+
+
+
+# colnames(pfam) <- c(
+#  "target_name", "target_accession", "tlen",
+#  "query_name", "query_accession", "qlen",
+#  "full_seq_Evalue", "full_seq_score", "full_seq_bias",
+#  "domain_num", "domain_total",
+#  "domain_c_Evalue", "domain_i_Evalue", "domain_score", "domain_bias",
+#  "hmm_from", "hmm_to", "ali_from", "ali_to", "env_from", "env_to",
+#  "acc", "description"
+# )
+
+
+transcripts_with_domain <- as.data.frame(pfam) %>%
+  dplyr::filter(X7 <= pfam_evalue) %>%
+  mutate(transcript_id = str_remove(X1, "\\.p[0-9]+$")) %>% # Remove .p1, .p2 etc
+  distinct(transcript_id) %>% 
+  mutate(domain_present = "yes")
+
+gtf_complete <- merge(dup_reference_annot, transcripts_with_domain, by = "transcript_id", all.x = T)
+gtf_complete <- merge(gtf_complete, PLEK[,c("transcript_id", "PLEK")], by = "transcript_id", all.x = T)
+gtf_complete <- merge(gtf_complete, CPC2[,c("transcript_id", "transcript_length", "peptide_length", "CPC2")], by = "transcript_id", all.x = T)
+
+rm(pfam, CPC2, PLEK, transcripts_with_domain)
+
+lncRNA <- gtf_complete %>% 
+  dplyr::filter(class_code %in% c("x", "i", "u")) %>% 
+  dplyr::filter(transcript_length >= min_length) %>% 
+  dplyr::filter(CPC2 == "noncoding" & PLEK == "Non-coding") %>% 
+  dplyr::filter(is.na(domain_present)) %>% pull(transcript_id) %>% unique()
+
+gtf_lncRNA <- gtf_complete %>% filter(transcript_id %in% lncRNA) %>% 
+  mutate(transcript_biotype = ifelse(type == "transcript", "lncRNA", ""))
+
+
+rtracklayer::export(gtf_lncRNA, "gtf_lncRNA.gtf", format = gtf)
+rtracklayer::export(gtf_complete, "gtf_complete.gtf", format = gtf)
+
+rm(dup_reference_annot, gtf_complete, lncRNA)
+
+################################################################################
+#################### Combine reference gtf and stringtie #######################
+################################################################################
+
+## Check gene equal with different class
+lncRNAs_check <- gtf_lncRNA %>% 
+  dplyr::filter(type == "transcript") %>% 
+  dplyr::select(class_code, gene_id) %>% 
+  distinct()
+
+## 15 genes with different class
+nrow(lncRNAs_check) - length(unique(gtf_lncRNA$gene_id))
+
+## Select genes
+cut_lncRNA <- lncRNAs_check %>% dplyr::filter(duplicated(lncRNAs_check$gene_id)) %>% pull(gene_id)
+
+#gtf_lncRNA %>% dplyr::filter(gene_id %in% cut_lncRNA) %>% 
+#  dplyr::filter(type == "transcript") %>% 
+#  dplyr::select(start, end, strand, gene_id, transcript_id, class_code, xloc, transcript_length)
+
+
+
+rm(lncRNAs_check, cut_lncRNA)
+
+## Filter transcript and exon from reference
+reference <- as.data.frame(rtracklayer::import(reference_gtf))
+reference_filter <- reference %>% filter(type %in% c("transcript", "exon"))
+
+#gtf_lncRNA <- GenomicRanges::makeGRangesFromDataFrame(gtf_lncRNA, keep.extra.columns = TRUE)
+
+# reference_filter <- makeGRangesFromDataFrame(
+#   reference_filter,
+#   seqnames.field = "seqnames",
+#   start.field = "start",
+#   end.field = "end",
+#   strand.field = "strand",
+#   keep.extra.columns = TRUE
+# )
+
+combined_stringtie_ncbi <- dplyr::bind_rows(reference_filter, gtf_lncRNA)
+# combined_stringtie_ncbi <- c(reference_filter, gtf_lncRNA)
+
+
+
+sum(gtf_lncRNA$transcript_id %in% reference$transcript_id) #0 is expected
+
+# Exportar como novo GTF
+rtracklayer::export(combined_stringtie_ncbi, "combined_stringtie_ncbi.gtf", format = gtf)
+
+#rm(combined_stringtie_ncbi, gtf_lncRNA, reference, reference_filter)
+
+################################################################################
+################################# Change name ##################################
+################################################################################
+
+combined_stringtie_ncbi_new_names <- as.data.frame(combined_stringtie_ncbi)
+
+new_names <- combined_stringtie_ncbi_new_names %>% 
+  dplyr::filter(type == "transcript") %>% 
+  mutate(transcript_id_new = ifelse(source == "StringTie",
+                                    paste0("Pb18_lncRNA", class_code, gsub("MSTRG.", "_", transcript_id)), 
+                                    transcript_id)) %>% 
+  mutate(gene_id_new = ifelse(source == "StringTie",
+                              paste0("Pb18_lncRNA", gsub("MSTRG.", "_", gene_id)), 
+                              gene_id)) %>% 
+  dplyr::select(transcript_id, transcript_id_new, gene_id, gene_id_new) %>% distinct()
+
+
+combined_stringtie_ncbi_new_names <- left_join(combined_stringtie_ncbi_new_names, 
+                                               new_names %>% dplyr::select(transcript_id, transcript_id_new) %>% distinct(), by = "transcript_id")
+
+combined_stringtie_ncbi_new_names <- left_join(combined_stringtie_ncbi_new_names, 
+                                               new_names %>% dplyr::select(gene_id, gene_id_new) %>% distinct(), by = "gene_id")
+
+combined_stringtie_ncbi_new_names <- combined_stringtie_ncbi_new_names %>% 
+  dplyr::rename(transcript_id_stringtie = transcript_id) %>% 
+  dplyr::rename(gene_id_stringtie = gene_id) %>% 
+  dplyr::rename(transcript_id = transcript_id_new) %>%
+  dplyr::rename(gene_id = gene_id_new) %>% 
+  dplyr::select(seqnames,start, end,width,strand,
+                source, type,score,phase,gene_id,
+                transcript_id,db_xref,gbkey,locus_tag,partial,
+                orig_protein_id,orig_transcript_id,product, transcript_biotype,exon_number,
+                pseudo,PLEK,transcript_length,peptide_length,CPC2,
+                xloc,class_code,tss_id,gene_name,cmp_ref,
+                transcript_id_stringtie,gene_id_stringtie)
+
+combined_stringtie_ncbi_new_names %>% dplyr::select(gene_id,gene_id_stringtie,
+                                                    transcript_id, transcript_id_stringtie, class_code)
+
+rtracklayer::export(combined_stringtie_ncbi_new_names, "combined_stringtie_ncbi_new_names.gtf", format = gtf)
+
+    cat('"', Sys.getenv("TASK_PROCESS", "SELECT_LNCRNAS"), '":\n', file="versions.yml", sep="")
+    cat('    r-base: "', R.version.string, '"\n', file="versions.yml", append=TRUE, sep="")
+    cat('    tidyverse: "', as.character(packageVersion("tidyverse")), '"\n', file = "versions.yml", append = TRUE, sep="")
+    cat('    rtracklayer: "', as.character(packageVersion("rtracklayer")), '"\n', file = "versions.yml", append = TRUE, sep="")
+    
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 48fa69e..c555822 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -40,7 +40,7 @@ process {
     }
     
     withName: 'STRINGTIE_MERGE' {
-        ext.args = '-T 1'
+        ext.args = '-T 1 -m 200'
     }  
 
     withName: 'GFFREAD' {
diff --git a/modules/local/selectlnc/main.nf b/modules/local/selectlnc/main.nf
index 9f5066a..57d26b1 100644
--- a/modules/local/selectlnc/main.nf
+++ b/modules/local/selectlnc/main.nf
@@ -11,7 +11,6 @@ process SELECT_LNCRNAS {
         'community.wave.seqera.io/library/bioconductor-rtracklayer_r-tidyverse:4e377b50350447f9' :
         'community.wave.seqera.io/library/bioconductor-rtracklayer_r-tidyverse:4e377b50350447f9' }"
     
-    
     publishDir "${params.outdir}/lncrnas", mode: 'copy'
     
     input:
@@ -20,13 +19,14 @@ process SELECT_LNCRNAS {
     tuple val(meta3), path(plek_results)
     tuple val(meta4), path(pfam_domtblout)
     path reference_gtf
+    path lncselect_script  // Adicionar o script como input
     
     output:
     tuple val(meta), path("gtf_lncRNA.gtf"), emit: lncrna_gtf
     tuple val(meta), path("gtf_complete.gtf"), emit: complete_gtf
     tuple val(meta), path("combined_stringtie_ncbi.gtf"), emit: combined_gtf
     tuple val(meta), path("combined_stringtie_ncbi_new_names.gtf"), emit: final_gtf
-    tuple val(meta), path("*.log"), emit: log
+    //tuple val(meta), path("*.log"), emit: log
     path "versions.yml", emit: versions
     
     when:
@@ -39,295 +39,19 @@ process SELECT_LNCRNAS {
     def pfam_evalue = task.ext.pfam_evalue ?: '1e-10'
     
     """
-    #!/usr/bin/env Rscript
-    
-    # Carregar bibliotecas necessárias
-    library(tidyverse)
-    library(rtracklayer)
-    
-    # Configurar log
-    log_file <- "${prefix}.log"
-    sink(log_file, append = TRUE, split = TRUE)
-    
-    cat("=== SELEÇÃO DE lncRNAs ===\\n")
-    cat("Iniciando seleção de lncRNAs e combinação com anotação de referência\\n")
-    cat("Data/hora de início:", as.character(Sys.time()), "\\n")
-    cat("\\n")
-    
-    # Ler arquivos de entrada
-    cat("Lendo arquivos de entrada...\\n")
-    cat("  GTF anotado: ${annotated_gtf}\\n")
-    cat("  Resultados CPC2: ${cpc2_results}\\n")
-    cat("  Resultados PLEK: ${plek_results}\\n")
-    cat("  Domínios Pfam: ${pfam_domtblout}\\n")
-    cat("  GTF de referência: ${reference_gtf}\\n")
-    cat("\\n")
-    
-    # Carregar GTF anotado
-    dup_reference_annot <- as.data.frame(rtracklayer::import("${annotated_gtf}"))
-    cat("GTF anotado carregado:", nrow(dup_reference_annot), "linhas\\n")
-    
-    # Carregar resultados CPC2
-    CPC2 <- read.delim("${cpc2_results}")
-    colnames(CPC2) <- c("transcript_id", "transcript_length", "peptide_length", "Fickett_score",
-                        "pI", "ORF_integrity", "coding_probability", "CPC2")
-    cat("Resultados CPC2 carregados:", nrow(CPC2), "transcritos\\n")
-    cat("  Codificantes:", sum(CPC2\$CPC2 == "coding"), "\\n")
-    cat("  Não-codificantes:", sum(CPC2\$CPC2 == "noncoding"), "\\n")
-    
-    # Carregar resultados PLEK
-    PLEK <- read.delim("${plek_results}", header = FALSE)
-    PLEK <- PLEK %>% mutate(transcript_id = str_extract(V3, "(?<=\\\\>).*?(?= loc:)"))
-    colnames(PLEK) <- c("PLEK", "value", "features", "transcript_id")
-    cat("Resultados PLEK carregados:", nrow(PLEK), "transcritos\\n")
-    cat("  Codificantes:", sum(PLEK\$PLEK == "Coding"), "\\n")
-    cat("  Não-codificantes:", sum(PLEK\$PLEK == "Non-coding"), "\\n")
-    
-    # Carregar domínios Pfam
-    pfam <- readr::read_table("${pfam_domtblout}", comment = "#", col_names = FALSE)
-    cat("Domínios Pfam carregados:", nrow(pfam), "hits\\n")
-    
-    # Identificar transcritos com domínios significativos
-    transcripts_with_domain <- as.data.frame(pfam) %>%
-      dplyr::filter(X7 <= ${pfam_evalue}) %>%
-      mutate(transcript_id = str_remove(X1, "\\\\.p[0-9]+\$")) %>% # Remove .p1, .p2 etc
-      distinct(transcript_id) %>%
-      mutate(domain_present = "yes")
-    
-    cat("Transcritos com domínios Pfam significativos (E-value <= ${pfam_evalue}):", nrow(transcripts_with_domain), "\\n")
-    
-    # Combinar todos os dados
-    cat("\\nCombinando dados...\\n")
-    gtf_complete <- merge(dup_reference_annot, transcripts_with_domain, by = "transcript_id", all.x = TRUE)
-    gtf_complete <- merge(gtf_complete, PLEK[,c("transcript_id", "PLEK")], by = "transcript_id", all.x = TRUE)
-    gtf_complete <- merge(gtf_complete, CPC2[,c("transcript_id", "transcript_length", "peptide_length", "CPC2")], by = "transcript_id", all.x = TRUE)
-    
-    cat("GTF completo criado:", nrow(gtf_complete), "linhas\\n")
-    
-    # Aplicar filtros para seleção de lncRNAs
-    cat("\\nAplicando filtros para seleção de lncRNAs...\\n")
-    cat("Critérios:\\n")
-    cat("  - class_code em: x, i, u\\n")
-    cat("  - Comprimento >= ${min_length} nt\\n")
-    cat("  - PLEK == 'Non-coding' E CPC2 == 'noncoding'\\n")
-    cat("  - Sem domínios Pfam significativos\\n")
-    cat("\\n")
-    
-    # Contar transcritos em cada etapa do filtro
-    step1 <- gtf_complete %>% dplyr::filter(class_code %in% c("x", "i", "u"))
-    cat("Após filtro class_code:", length(unique(step1\$transcript_id)), "transcritos únicos\\n")
-    
-    step2 <- step1 %>% dplyr::filter(transcript_length >= ${min_length})
-    cat("Após filtro comprimento:", length(unique(step2\$transcript_id)), "transcritos únicos\\n")
-    
-    step3 <- step2 %>% dplyr::filter(PLEK == "Non-coding" & CPC2 == "noncoding")
-    cat("Após filtros CPC2 e PLEK:", length(unique(step3\$transcript_id)), "transcritos únicos\\n")
-    
-    step4 <- step3 %>% dplyr::filter(is.na(domain_present))
-    cat("Após filtro domínios Pfam:", length(unique(step4\$transcript_id)), "transcritos únicos\\n")
-    
-    # Selecionar lncRNAs finais
-    lncRNA <- step4 %>% pull(transcript_id) %>% unique()
-    cat("\\nlncRNAs selecionados:", length(lncRNA), "transcritos\\n")
-    
-    # Criar GTF de lncRNAs
-    gtf_lncRNA <- gtf_complete %>% 
-      filter(transcript_id %in% lncRNA) %>%
-      mutate(transcript_biotype = ifelse(type == "transcript", "lncRNA", ""))
-    
-    cat("GTF lncRNA criado:", nrow(gtf_lncRNA), "linhas\\n")
-    
-    # Verificar genes com diferentes class_codes
-    cat("\\nVerificando genes com diferentes class_codes...\\n")
-    lncRNAs_check <- gtf_lncRNA %>%
-      dplyr::filter(type == "transcript") %>%
-      dplyr::select(class_code, gene_id) %>%
-      distinct()
-    
-    duplicated_genes <- nrow(lncRNAs_check) - length(unique(gtf_lncRNA\$gene_id))
-    cat("Genes com diferentes class_codes:", duplicated_genes, "\\n")
-    
-    # Exportar GTFs intermediários
-    cat("\\nExportando GTFs...\\n")
-    rtracklayer::export(gtf_lncRNA, "gtf_lncRNA.gtf")
-    rtracklayer::export(gtf_complete, "gtf_complete.gtf")
-    cat("  gtf_lncRNA.gtf exportado\\n")
-    cat("  gtf_complete.gtf exportado\\n")
-    
-    # Combinar com anotação de referência
-    cat("\\nCombinando com anotação de referência...\\n")
-    reference <- as.data.frame(rtracklayer::import("${reference_gtf}"))
-    reference_filter <- reference %>% filter(type %in% c("transcript", "exon"))
-    cat("Referência carregada:", nrow(reference_filter), "linhas (transcript + exon)\\n")
-    
-    combined_stringtie_ncbi <- dplyr::bind_rows(reference_filter, gtf_lncRNA)
-
-    combined_stringtie_ncbi\$Dbxref <- sapply(combined_stringtie_ncbi\$Dbxref, function(x) {
-      if (is.null(x)) return(NA_character_)
-      paste(x, collapse = ",")
-    })
-
-    combined_stringtie_ncbi\$end_range <- sapply(combined_stringtie_ncbi\$end_range, function(x) {
-      if (is.null(x)) return(NA_character_)
-      paste(x, collapse = ",")
-    })
-
-    combined_stringtie_ncbi\$start_range <- sapply(combined_stringtie_ncbi\$start_range, function(x) {
-      if (is.null(x)) return(NA_character_)
-      paste(x, collapse = ",")
-    })
-
-    combined_stringtie_ncbi\$Parent <- sapply(combined_stringtie_ncbi\$Parent, function(x) {
-      if (is.null(x)) return(NA_character_)
-      paste(x, collapse = ",")
-    })
-
-    combined_stringtie_ncbi\$Note <- sapply(combined_stringtie_ncbi\$Note, function(x) {
-      if (is.null(x)) return(NA_character_)
-      paste(x, collapse = ",")
-    })
-
-    combined_stringtie_ncbi\$gene_id[is.na(combined_stringtie_ncbi\$gene_id)] <- "unknown_gene"
-    combined_stringtie_ncbi\$transcript_id[is.na(combined_stringtie_ncbi\$transcript_id)] <- "unknown_transcript"
-    combined_stringtie_ncbi\$exon_number[is.na(combined_stringtie_ncbi\$exon_number)] <- "0"
-
-    # Criar a coluna `attribute` no formato GTF
-    combined_stringtie_ncbi\$attribute <- paste0(
-      'gene_id "', combined_stringtie_ncbi\$gene_id, '"; ',
-      'transcript_id "', combined_stringtie_ncbi\$transcript_id, '"; ',
-      'exon_number "', combined_stringtie_ncbi\$exon_number, '";'
-    )
-
-    # Criar o GRanges com os campos obrigatórios
-    combined_stringtie_ncbi <- GRanges(
-      seqnames = combined_stringtie_ncbi\$seqnames,
-      ranges = IRanges(start = combined_stringtie_ncbi\$start, end = combined_stringtie_ncbi\$end),
-      strand = combined_stringtie_ncbi\$strand,
-      
-      # metadados (evite nomes reservados)
-      source = combined_stringtie_ncbi\$source,
-      type = combined_stringtie_ncbi\$type,
-      class_code = combined_stringtie_ncbi\$class_code,
-      transcript_id = combined_stringtie_ncbi\$transcript_id,
-      gene_id = combined_stringtie_ncbi\$gene_id,
-      db_xref = sapply(combined_stringtie_ncbi\$Dbxref, paste, collapse = ","),
-      gbkey = combined_stringtie_ncbi\$gbkey,
-      locus_tag = combined_stringtie_ncbi\$locus_tag,
-      partial = combined_stringtie_ncbi\$partial,
-      orig_protein_id = combined_stringtie_ncbi\$orig_protein_id,
-      orig_transcript_id = combined_stringtie_ncbi\$orig_transcript_id,
-      product = combined_stringtie_ncbi\$product,
-      transcript_biotype = combined_stringtie_ncbi\$transcript_biotype,
-      exon_number = combined_stringtie_ncbi\$exon_number,
-      pseudo = combined_stringtie_ncbi\$pseudo,
-      PLEK = combined_stringtie_ncbi\$PLEK,
-      transcript_length = combined_stringtie_ncbi\$transcript_length,
-      peptide_length = combined_stringtie_ncbi\$peptide_length,
-      CPC2 = combined_stringtie_ncbi\$CPC2,
-      xloc = combined_stringtie_ncbi\$xloc,
-      tss_id = combined_stringtie_ncbi\$tss_id,
-      cmp_ref = combined_stringtie_ncbi\$cmp_ref,
-      transcript_id_stringtie = NA_character_,
-      gene_id_stringtie = NA_character_,
-      score = combined_stringtie_ncbi\$score,
-      phase = combined_stringtie_ncbi\$phase,
-      attribute = combined_stringtie_ncbi\$attribute
-    )
-
-    cat("GTF combinado criado:", nrow(combined_stringtie_ncbi), "linhas\n")
-
-    # Verificar sobreposição
-    overlap_check <- sum(gtf_lncRNA\$transcript_id %in% reference\$transcript_id)
-    cat("Sobreposição transcript_id (esperado 0):", overlap_check, "\n")
-
-    rtracklayer::export(combined_stringtie_ncbi, "combined_stringtie_ncbi.gtf")
-    cat("  combined_stringtie_ncbi.gtf exportado\n")
-
-    # Renomear IDs
-    cat("\nRenomeando IDs...\n")
-    combined_stringtie_ncbi_new_names <- as.data.frame(rtracklayer::import("combined_stringtie_ncbi.gtf"))
-
-    new_names <- combined_stringtie_ncbi_new_names %>%
-      dplyr::filter(type == "transcript") %>%
-      mutate(transcript_id_new = ifelse(source == "StringTie",
-                                        paste0("Pb18_lncRNA", class_code, gsub("MSTRG.", "_", transcript_id)),
-                                        transcript_id)) %>%
-      mutate(gene_id_new = ifelse(source == "StringTie",
-                                  paste0("Pb18_lncRNA", gsub("MSTRG.", "_", gene_id)),
-                                  gene_id)) %>%
-      dplyr::select(transcript_id, transcript_id_new, gene_id, gene_id_new) %>% distinct()
-
-    cat("Novos nomes criados para:", nrow(new_names), "transcritos\n")
-
-    # Aplicar novos nomes
-    combined_stringtie_ncbi_new_names <- left_join(combined_stringtie_ncbi_new_names,
-                                                  new_names %>% dplyr::select(transcript_id, transcript_id_new) %>% distinct(), 
-                                                  by = "transcript_id")
-
-    combined_stringtie_ncbi_new_names <- left_join(combined_stringtie_ncbi_new_names,
-                                                  new_names %>% dplyr::select(gene_id, gene_id_new) %>% distinct(), 
-                                                  by = "gene_id")
-
-    # Reorganizar colunas
-    combined_stringtie_ncbi_new_names <- combined_stringtie_ncbi_new_names %>%
-      dplyr::rename(transcript_id_stringtie = transcript_id) %>%
-      dplyr::rename(gene_id_stringtie = gene_id) %>%
-      dplyr::rename(transcript_id = transcript_id_new) %>%
-      dplyr::rename(gene_id = gene_id_new) %>%
-      dplyr::select(seqnames, start, end, width, strand,
-                    source, type, score, phase, gene_id,
-                    transcript_id, db_xref, gbkey, locus_tag, partial,
-                    orig_protein_id, orig_transcript_id, product, transcript_biotype, exon_number,
-                    pseudo, PLEK, transcript_length, peptide_length, CPC2,
-                    xloc, class_code, tss_id, cmp_ref,
-                    transcript_id_stringtie, gene_id_stringtie)
-
-    
-    rtracklayer::export(combined_stringtie_ncbi_new_names, "combined_stringtie_ncbi_new_names.gtf")
-    cat("  combined_stringtie_ncbi_new_names.gtf exportado\\n")
-    
-    # Estatísticas finais
-    cat("\\n=== ESTATÍSTICAS FINAIS ===\\n")
-    cat("Total de lncRNAs identificados:", length(lncRNA), "\\n")
-    
-    # Estatísticas por class_code
-    class_stats <- gtf_lncRNA %>% 
-      filter(type == "transcript") %>% 
-      count(class_code) %>% 
-      arrange(desc(n))
-    
-    cat("Distribuição por class_code:\\n")
-    for(i in 1:nrow(class_stats)) {
-      cat("  ", class_stats\$class_code[i], ":", class_stats\$n[i], "\\n")
-    }
-    
-    # Estatísticas de comprimento
-    length_stats <- gtf_lncRNA %>% 
-      filter(type == "transcript") %>% 
-      summarise(
-        mean_length = mean(transcript_length, na.rm = TRUE),
-        median_length = median(transcript_length, na.rm = TRUE),
-        min_length = min(transcript_length, na.rm = TRUE),
-        max_length = max(transcript_length, na.rm = TRUE)
-      )
-    
-    cat("Estatísticas de comprimento:\\n")
-    cat("  Média:", round(length_stats\$mean_length, 2), "nt\\n")
-    cat("  Mediana:", length_stats\$median_length, "nt\\n")
-    cat("  Mínimo:", length_stats\$min_length, "nt\\n")
-    cat("  Máximo:", length_stats\$max_length, "nt\\n")
-    
-    cat("\\nProcessamento concluído com sucesso!\\n")
-    cat("Data/hora de término:", as.character(Sys.time()), "\\n")
-    sink()
-    
-    # Criar arquivo de versões
-    cat('"NFCORE_NOTTOCODE:NOTTOCODE:SELECT_LNCRNAS":\n', file="versions.yml")
-    cat('    r-base: "', R.version.string, '"\n', file="versions.yml", append=TRUE)
-    cat('    tidyverse: "', as.character(packageVersion("tidyverse")), '"\n', file = "versions.yml", append = TRUE)
-    cat('    rtracklayer: "', as.character(packageVersion("rtracklayer")), '"\n', file = "versions.yml", append = TRUE)
+    # Executar o script R com as variáveis de ambiente
+    export ANNOTATED_GTF="${annotated_gtf}"
+    export CPC2_RESULTS="${cpc2_results}"
+    export PLEK_RESULTS="${plek_results}"
+    export PFAM_DOMTBLOUT="${pfam_domtblout}"
+    export REFERENCE_GTF="${reference_gtf}"
+    export PREFIX="${prefix}"
+    export MIN_LENGTH="${min_length}"
+    export PFAM_EVALUE="${pfam_evalue}"
+    
+    Rscript ${lncselect_script}
     """
-    
+
     stub:
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
diff --git a/nextflow.config b/nextflow.config
index bacb242..75198a1 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -51,6 +51,7 @@ params {
     mstrg_prep_script          = "${projectDir}/assets/mstrg_prep.pl"
     plek                       = "${projectDir}/assets/PLEK.1.2.tar.xz"
     cpc2                       = "${projectDir}/assets/CPC2_standalone"
+    lncselect                  = "${projectDir}/assets/lncselect.R"
 
     
     // Pipeline specific options
diff --git a/workflows/nottocode.nf b/workflows/nottocode.nf
index 3bca03a..ac9f27b 100644
--- a/workflows/nottocode.nf
+++ b/workflows/nottocode.nf
@@ -50,8 +50,12 @@ workflow NOTTOCODE {
         error "ERROR: missing --reference_genome fasta"
     }
 
+    if (!params.reference_gff) {
+        error "ERROR: missing --reference_gff annotation file"
+    }
+
     if (!params.reference_gtf) {
-        error "ERROR: missing --reference_gtf annotation file"
+    error "ERROR: missing --reference_gtf annotation file"
     }
 
     if (!params.pfam_db) {
@@ -101,6 +105,7 @@ workflow NOTTOCODE {
     // Refence files
     //
     ch_genome_fasta = file(params.reference_genome)
+    ch_reference_gff = file(params.reference_gff)
     ch_reference_gtf = file(params.reference_gtf)
 
     //
@@ -142,7 +147,7 @@ workflow NOTTOCODE {
     
     STRINGTIE_MERGE (
         ch_gtf_files,    
-        ch_reference_gtf    
+        ch_reference_gff    
     )
     ch_versions = ch_versions.mix(STRINGTIE_MERGE.out.versions)
 
@@ -162,7 +167,7 @@ workflow NOTTOCODE {
 
     GFFCOMPARE (
         ch_merged_gtf,
-        ch_reference_gtf
+        ch_reference_gff
     )
     ch_versions = ch_versions.mix(GFFCOMPARE.out.versions)
 
@@ -282,12 +287,16 @@ workflow NOTTOCODE {
     )
     ch_versions = ch_versions.mix(HMMER_HMMSEARCH.out.versions)
 
+
+
+    lncselect_script_ch = Channel.fromPath(params.lncselect)
     SELECT_LNCRNAS (
-        MSTRG_PREP.out.gtf_prep,
+        GFFCOMPARE.out.annotated_gtf,
         CPC2.out.cpc2_results,
         PLEK.out.plek_results,
         HMMER_HMMSEARCH.out.domain_summary,
-        ch_reference_gtf
+        ch_reference_gtf,
+        lncselect_script_ch
     )
      ch_versions = ch_versions.mix(SELECT_LNCRNAS.out.versions)