8  My Document

9 Post-processing/QC

9.1 Libraries

9.2 Data Format

The examples below require a tidy (‘long-format’) table with following columns: DataFileName, Feature and Area. See chapters xxxx on how to prepare data in this format. For this chapter we use the example data

# Load and format test data
mydataset_orig <- read_csv(here("data/Testdata_Lipidomics_flat_wide_V3.csv"))

# Convert to long format
mydataset <- mydataset_orig |> 
  pivot_longer(-DataFileName, names_to = "Feature", values_to = "Area")

Alternatively, to import a MassHunter CSV file you can use read_MassHunterCSV from the SLINGtools (https://slinghub.github.io/SLINGtools) package

mydataset <- SLINGtools::read_MassHunterCSV(
  file = here("data/Testdata_MHQuant_Detailed_V3.csv"))

9.3 Annotate

d <- mydataset |> 
  select(DataFileName, Lipid = Feature, Area) |> 
  separate(col = DataFileName, 
           into = c("RunID", "QCtype", "SampleName"),
           convert = TRUE,
           sep = "_", 
           remove = FALSE) |> 
  mutate(SampleName = str_replace(SampleName, ".d", "")) 

9.4 RunScatter

This is for one page (means one plot with n rows and m columns)

d_plot <- d |>
  filter(str_detect(string = Lipid, pattern = fixed("ISTD"))) |> 
  filter(QCtype %in% c("SPL", "RQC", "TQC", "BQC", "PBLK"))

d_plot$QCtype <- factor(d_plot$QCtype, levels = c("SPL", "RQC", "TQC", "BQC", "PBLK"))

d_plot <- d_plot |> arrange(QCtype)

qc_colors <- c("SPL"="grey50", "TQC" = "blue", "BQC" = "red", 
               "RQC" = "grey80", PBLK="green", "NA" = "black")

ggplot(data = d_plot,
       mapping = aes(x=RunID, y=Area)) +
  geom_point(aes(color = QCtype), size = 1.5) +
  facet_wrap(vars(Lipid), scales = "free_y") + 
  scale_color_manual(values = qc_colors ) +
  theme_light(base_size = 8)
#> Warning: Removed 3 rows containing missing values (geom_point).

QCtype_levels_temp <- c( "SBLK", "TBLK", "UBLK", "BQC", "TQC", "RQC", "EQC", "NIST", "LTR", "PBLK", "SPL", "SST", "MBLK")

QCtype_col <-         c("#1854f9","#db0202", "#de21de","#db0202","#1854f9","#96a4ff","#513c3c","#002e6b","#880391","#08c105","#aaaeaf","#bafc03", "black")
names(QCtype_col) <- QCtype_levels_temp


QCtype_shape <- c(23,23,23,16,25,6,24,23,23,23,22,10,10)
names(QCtype_shape) <- QCtype_levels_temp


QCtype_fillcol <- c("#f891ff","#fffb03","#c1bd04","#db0202","#1854f9","#688ff9","NA", "#cce2ff","#880391", "#08c105", "NA","#aaaeaf","black")
names(QCtype_fillcol) <- QCtype_levels_temp


ggplot(data = d_plot,
       mapping = aes(x=RunID, y=Area, color= QCtype , fill= QCtype , shape = QCtype)) +
  geom_point(size = 1.5) +
  facet_wrap(vars(Lipid), scales = "free_y", ncol = 3) + 
    expand_limits(y = 0) +
    scale_color_manual(values=QCtype_col, drop=TRUE) +
    scale_fill_manual(values=QCtype_fillcol, drop=TRUE)+
    scale_shape_manual(values=QCtype_shape, drop=TRUE) +
  theme_light(base_size = 8)
#> Warning: Removed 3 rows containing missing values (geom_point).