12  Regressions for many lipids

12.1 Libraries

12.2 Overview lm and ‘broom’ package

d_test <- tibble(InjVol = c(0,0.2, 0.4, 0.6, 0.8,1),
                  Response  = c(12, 23, 34,44, 89, 101)) 
                 
d_test
#> # A tibble: 6 × 2
#>   InjVol Response
#>    <dbl>    <dbl>
#> 1    0         12
#> 2    0.2       23
#> 3    0.4       34
#> 4    0.6       44
#> 5    0.8       89
#> 6    1        101
# Linear model
model <- lm(formula = Response ~ InjVol, data = d_test)

# Get result summary
summary(model)
#> 
#> Call:
#> lm(formula = Response ~ InjVol, data = d_test)
#> 
#> Residuals:
#>        1        2        3        4        5        6 
#>   8.1429   0.4857  -7.1714 -15.8286  10.5143   3.8571 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)    3.857      8.043   0.480  0.65656   
#> InjVol        93.286     13.282   7.024  0.00216 **
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 11.11 on 4 degrees of freedom
#> Multiple R-squared:  0.925,  Adjusted R-squared:  0.9062 
#> F-statistic: 49.33 on 1 and 4 DF,  p-value: 0.002165
# Get r^2 only
summary(model)$r.squared
#> [1] 0.9249954
# Using broom functions to summarize model results into a table
broom::glance(model) 
#> # A tibble: 1 × 12
#>   r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
#>       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
#> 1     0.925         0.906  11.1      49.3 0.00216     1  -21.7  49.5  48.9
#> # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
broom::tidy(model) 
#> # A tibble: 2 × 5
#>   term        estimate std.error statistic p.value
#>   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
#> 1 (Intercept)     3.86      8.04     0.480 0.657  
#> 2 InjVol         93.3      13.3      7.02  0.00216

12.3 Import Datasets

d_orig <- read_csv(here("data/Testdata_Lipidomics_flat_wide_annotated_V1.csv"))

12.4 Prepare Data

# Convert to long format

# Convert to long format
d_long <- d_orig |> 
  pivot_longer(cols = -DataFileName:-InjVol, 
               names_to = "Lipid" , 
               values_to = "Area")

# Get a table with RQCs only and sort by Lipid
d_rqc <- d_long |> 
  filter(QCtype == "RQC") |> 
  arrange(Lipid)

12.5 Run regression for each lipid

In this example a logistic regression is used. The output of glm() is converted to a tidy table using the broom::tidy() function.

model <- as.formula("Area ~ InjVol")

d_res <- d_rqc %>%
  group_by(Lipid) %>%
  nest() %>%
  mutate(
    models = map(data, function(x) lm(model, data = x)), 
    #mandel = map(data, \(x) DCVtestkit::calculate_mandel(x, "InjVol", "Area")),
    #ppa = map(data, \(x) DCVtestkit::calculate_pra_linear(x, "InjVol", "Area")),
    tidy = map(models, function(x) broom::glance(x))) |> 
  unnest(c(tidy)) |> 
  dplyr::select(-data, -models)

# Fix DCVtestkit::calculate_pra_linear currently returning a list instead vector
# d_res$ppa <- unlist(d_res$ppa)

The results contain the combined estimates, errors, and P values for each term for each lipid species.

Lipid r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
CE 16:0 0.626274 0.602917 44054.0359 26.81215 0.000092 1 -216.9579 439.9159 442.5870 3.105213e+10 16 18
CE 16:1 0.834197 0.823834 8622.1216 80.50006 0.000000 1 -187.5984 381.1968 383.8679 1.189456e+09 16 18
CE 16:2 0.609873 0.585490 585.7358 25.01226 0.000131 1 -139.1925 284.3850 287.0561 5.489383e+06 16 18
CE 17:0 0.900585 0.894371 1178.0217 144.94082 0.000000 1 -151.7695 309.5390 312.2101 2.220376e+07 16 18
CE 17:1 0.945373 0.941959 1914.1239 276.89543 0.000000 1 -160.5071 327.0142 329.6854 5.862192e+07 16 18
CE 18:0 0.952998 0.950061 8748.1092 324.41314 0.000000 1 -187.8595 381.7190 384.3902 1.224471e+09 16 18
CE 18:1 0.944273 0.940791 522750.3432 271.11631 0.000000 1 -261.4843 528.9686 531.6397 4.372287e+12 16 18
CE 18:1 d7 (ISTD) 0.922337 0.917483 60313.2929 190.01887 0.000000 1 -222.6124 451.2248 453.8959 5.820309e+10 16 18
CE 18:2 0.950589 0.947501 2583852.0035 307.81495 0.000000 1 -290.2471 586.4942 589.1653 1.068207e+14 16 18
CE 18:3 0.767452 0.752918 88635.9222 52.80302 0.000002 1 -229.5421 465.0842 467.7553 1.257012e+11 16 18