Might as well load these packages, they are always useful.

library(tidyverse)
library(tidyselect)
library(janitor)
library(lubridate)

Converting

Convert all character variables to numeric

mtcars %>% 
  mutate(across(where(is_character), as.numeric))
# A tibble: 32 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
 3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
 4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
 5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
 6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
 7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
 8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2
 9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
# … with 22 more rows

Convert all numeric variables to factor variables

mtcars %>% 
  mutate(across(where(is.numeric), as_factor))
# A tibble: 32 × 11
   mpg   cyl   disp  hp    drat  wt    qsec  vs    am    gear  carb 
   <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
 1 21    6     160   110   3.9   2.62  16.46 0     1     4     4    
 2 21    6     160   110   3.9   2.875 17.02 0     1     4     4    
 3 22.8  4     108   93    3.85  2.32  18.61 1     1     4     1    
 4 21.4  6     258   110   3.08  3.215 19.44 1     0     3     1    
 5 18.7  8     360   175   3.15  3.44  17.02 0     0     3     2    
 6 18.1  6     225   105   2.76  3.46  20.22 1     0     3     1    
 7 14.3  8     360   245   3.21  3.57  15.84 0     0     3     4    
 8 24.4  4     146.7 62    3.69  3.19  20    1     0     4     2    
 9 22.8  4     140.8 95    3.92  3.15  22.9  1     0     4     2    
10 19.2  6     167.6 123   3.92  3.44  18.3  1     0     4     4    
# … with 22 more rows

Convert all character variables to factor variables

starwars %>% 
  mutate(across(where(is_character),as_factor))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <fct>     <int> <dbl> <fct>      <fct>      <fct>          <dbl> <fct> <fct> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
 3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
 4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
 5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
 6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <fct>, species <fct>,
#   films <list>, vehicles <list>, starships <list>

Convert some (but not all) character variables to factor variables

starwars %>% 
  mutate(across(c(hair_color, skin_color), as_factor))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <fct>      <fct>      <chr>          <dbl> <chr> <chr> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 C-3PO       167    75 <NA>       gold       yellow         112   none  mascu…
 3 R2-D2        96    32 <NA>       white, bl… red             33   none  mascu…
 4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
 5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
 6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 R5-D4        97    32 <NA>       white, red red             NA   none  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Convert multiple variables to date variables

nycflights13::flights %>%
  select(time_hour) %>% 
  mutate(across(c(time_hour), ymd_hms))
# A tibble: 336,776 × 1
   time_hour          
   <dttm>             
 1 2013-01-01 05:00:00
 2 2013-01-01 05:00:00
 3 2013-01-01 05:00:00
 4 2013-01-01 05:00:00
 5 2013-01-01 06:00:00
 6 2013-01-01 05:00:00
 7 2013-01-01 06:00:00
 8 2013-01-01 06:00:00
 9 2013-01-01 06:00:00
10 2013-01-01 06:00:00
# … with 336,766 more rows

Collapsing

Collapse all character variables into 5 categories and an “other” category based on frequency

starwars %>% 
  mutate(across(where(is.character), fct_lump_n, n =  5, other_level = 'forgotten category'))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <fct>     <int> <dbl> <fct>      <fct>      <fct>          <dbl> <fct> <fct> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 C-3PO       167    75 <NA>       forgotten… yellow         112   none  mascu…
 3 R2-D2        96    32 <NA>       forgotten… forgotte…       33   none  mascu…
 4 Darth V…    202   136 none       forgotten… yellow          41.9 male  mascu…
 5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
 6 Owen La…    178   120 forgotten… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 R5-D4        97    32 <NA>       forgotten… forgotte…       NA   none  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 Obi-Wan…    182    77 forgotten… fair       forgotte…       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <fct>, species <fct>,
#   films <list>, vehicles <list>, starships <list>

Renaming

Add a prefix to all variables of a particular type

iris %>% 
  rename_with(~str_c("test_", .), where(is.factor))
# A tibble: 150 × 5
   Sepal.Length Sepal.Width Petal.Length Petal.Width test_Species
          <dbl>       <dbl>        <dbl>       <dbl> <fct>       
 1          5.1         3.5          1.4         0.2 setosa      
 2          4.9         3            1.4         0.2 setosa      
 3          4.7         3.2          1.3         0.2 setosa      
 4          4.6         3.1          1.5         0.2 setosa      
 5          5           3.6          1.4         0.2 setosa      
 6          5.4         3.9          1.7         0.4 setosa      
 7          4.6         3.4          1.4         0.3 setosa      
 8          5           3.4          1.5         0.2 setosa      
 9          4.4         2.9          1.4         0.2 setosa      
10          4.9         3.1          1.5         0.1 setosa      
# … with 140 more rows

Add a prefix to all variable names

mtcars %>% 
  rename_with(~paste0("prefix_", .), everything())
# A tibble: 32 × 11
   prefix_mpg prefix_cyl prefix_disp prefix_hp prefix_drat prefix_wt prefix_qsec
        <dbl>      <dbl>       <dbl>     <dbl>       <dbl>     <dbl>       <dbl>
 1       21            6        160        110        3.9       2.62        16.5
 2       21            6        160        110        3.9       2.88        17.0
 3       22.8          4        108         93        3.85      2.32        18.6
 4       21.4          6        258        110        3.08      3.22        19.4
 5       18.7          8        360        175        3.15      3.44        17.0
 6       18.1          6        225        105        2.76      3.46        20.2
 7       14.3          8        360        245        3.21      3.57        15.8
 8       24.4          4        147.        62        3.69      3.19        20  
 9       22.8          4        141.        95        3.92      3.15        22.9
10       19.2          6        168.       123        3.92      3.44        18.3
# … with 22 more rows, and 4 more variables: prefix_vs <dbl>, prefix_am <dbl>,
#   prefix_gear <dbl>, prefix_carb <dbl>

Limit variable names to a particular length

starwars %>%
  set_names(substr(names(.), 1,3))
# A tibble: 87 × 14
   nam     hei   mas hai   ski   eye     bir sex   gen   hom   spe   fil   veh  
   <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <lis> <lis>
 1 Luke…   172    77 blond fair  blue   19   male  masc… Tato… Human <chr> <chr>
 2 C-3PO   167    75 <NA>  gold  yell… 112   none  masc… Tato… Droid <chr> <chr>
 3 R2-D2    96    32 <NA>  whit… red    33   none  masc… Naboo Droid <chr> <chr>
 4 Dart…   202   136 none  white yell…  41.9 male  masc… Tato… Human <chr> <chr>
 5 Leia…   150    49 brown light brown  19   fema… femi… Alde… Human <chr> <chr>
 6 Owen…   178   120 brow… light blue   52   male  masc… Tato… Human <chr> <chr>
 7 Beru…   165    75 brown light blue   47   fema… femi… Tato… Human <chr> <chr>
 8 R5-D4    97    32 <NA>  whit… red    NA   none  masc… Tato… Droid <chr> <chr>
 9 Bigg…   183    84 black light brown  24   male  masc… Tato… Human <chr> <chr>
10 Obi-…   182    77 aubu… fair  blue…  57   male  masc… Stew… Human <chr> <chr>
# … with 77 more rows, and 1 more variable: sta <list>

Creating

Duplicate each row “X” number of times

iris %>% 
    uncount(3)
# A tibble: 450 × 5
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
 1          5.1         3.5          1.4         0.2 setosa 
 2          5.1         3.5          1.4         0.2 setosa 
 3          5.1         3.5          1.4         0.2 setosa 
 4          4.9         3            1.4         0.2 setosa 
 5          4.9         3            1.4         0.2 setosa 
 6          4.9         3            1.4         0.2 setosa 
 7          4.7         3.2          1.3         0.2 setosa 
 8          4.7         3.2          1.3         0.2 setosa 
 9          4.7         3.2          1.3         0.2 setosa 
10          4.6         3.1          1.5         0.2 setosa 
# … with 440 more rows

Create an ID variable based on row number

iris %>% 
  rowid_to_column("id")
# A tibble: 150 × 6
      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
 1     1          5.1         3.5          1.4         0.2 setosa 
 2     2          4.9         3            1.4         0.2 setosa 
 3     3          4.7         3.2          1.3         0.2 setosa 
 4     4          4.6         3.1          1.5         0.2 setosa 
 5     5          5           3.6          1.4         0.2 setosa 
 6     6          5.4         3.9          1.7         0.4 setosa 
 7     7          4.6         3.4          1.4         0.3 setosa 
 8     8          5           3.4          1.5         0.2 setosa 
 9     9          4.4         2.9          1.4         0.2 setosa 
10    10          4.9         3.1          1.5         0.1 setosa 
# … with 140 more rows

Create an ID variable based on row number - 2nd method

iris %>% 
  mutate(id = row_number())
# A tibble: 150 × 6
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species    id
          <dbl>       <dbl>        <dbl>       <dbl> <fct>   <int>
 1          5.1         3.5          1.4         0.2 setosa      1
 2          4.9         3            1.4         0.2 setosa      2
 3          4.7         3.2          1.3         0.2 setosa      3
 4          4.6         3.1          1.5         0.2 setosa      4
 5          5           3.6          1.4         0.2 setosa      5
 6          5.4         3.9          1.7         0.4 setosa      6
 7          4.6         3.4          1.4         0.3 setosa      7
 8          5           3.4          1.5         0.2 setosa      8
 9          4.4         2.9          1.4         0.2 setosa      9
10          4.9         3.1          1.5         0.1 setosa     10
# … with 140 more rows

Create unique identifiers based on other variables

mtcars %>% 
  group_by(cyl, carb) %>% 
  mutate(id = cur_group_id()) %>% 
  ungroup()
# A tibble: 32 × 12
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb    id
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4     4
 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4     4
 3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1     1
 4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1     3
 5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2     6
 6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1     3
 7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4     8
 8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2     2
 9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2     2
10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4     4
# … with 22 more rows

Create quantiles

iris %>% 
  mutate(quartiles = ntile(Sepal.Length, 4))
# A tibble: 150 × 6
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species quartiles
          <dbl>       <dbl>        <dbl>       <dbl> <fct>       <int>
 1          5.1         3.5          1.4         0.2 setosa          1
 2          4.9         3            1.4         0.2 setosa          1
 3          4.7         3.2          1.3         0.2 setosa          1
 4          4.6         3.1          1.5         0.2 setosa          1
 5          5           3.6          1.4         0.2 setosa          1
 6          5.4         3.9          1.7         0.4 setosa          2
 7          4.6         3.4          1.4         0.3 setosa          1
 8          5           3.4          1.5         0.2 setosa          1
 9          4.4         2.9          1.4         0.2 setosa          1
10          4.9         3.1          1.5         0.1 setosa          1
# … with 140 more rows

Altering

Reverse 0s and 1s in a vector

mtcars %>%
  mutate(vs = case_when(
    vs == 1 ~ 0,
    vs == 0 ~ 1
  ))
# A tibble: 32 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21       6  160    110  3.9   2.62  16.5     1     1     4     4
 2  21       6  160    110  3.9   2.88  17.0     1     1     4     4
 3  22.8     4  108     93  3.85  2.32  18.6     0     1     4     1
 4  21.4     6  258    110  3.08  3.22  19.4     0     0     3     1
 5  18.7     8  360    175  3.15  3.44  17.0     1     0     3     2
 6  18.1     6  225    105  2.76  3.46  20.2     0     0     3     1
 7  14.3     8  360    245  3.21  3.57  15.8     1     0     3     4
 8  24.4     4  147.    62  3.69  3.19  20       0     0     4     2
 9  22.8     4  141.    95  3.92  3.15  22.9     0     0     4     2
10  19.2     6  168.   123  3.92  3.44  18.3     0     0     4     4
# … with 22 more rows

Replace all particular values in a dataframe with another value

mtcars %>% 
  mutate(across(everything(), ~replace(., . ==  0 , "Zero")))
# A tibble: 32 × 11
   mpg   cyl   disp  hp    drat  wt    qsec  vs    am    gear  carb 
   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
 1 21    6     160   110   3.9   2.62  16.46 Zero  1     4     4    
 2 21    6     160   110   3.9   2.875 17.02 Zero  1     4     4    
 3 22.8  4     108   93    3.85  2.32  18.61 1     1     4     1    
 4 21.4  6     258   110   3.08  3.215 19.44 1     Zero  3     1    
 5 18.7  8     360   175   3.15  3.44  17.02 Zero  Zero  3     2    
 6 18.1  6     225   105   2.76  3.46  20.22 1     Zero  3     1    
 7 14.3  8     360   245   3.21  3.57  15.84 Zero  Zero  3     4    
 8 24.4  4     146.7 62    3.69  3.19  20    1     Zero  4     2    
 9 22.8  4     140.8 95    3.92  3.15  22.9  1     Zero  4     2    
10 19.2  6     167.6 123   3.92  3.44  18.3  1     Zero  4     4    
# … with 22 more rows

Rounding to 1 digit in a single variable

mtcars %>% 
    mutate(wt = round(wt, 1))
# A tibble: 32 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21       6  160    110  3.9    2.6  16.5     0     1     4     4
 2  21       6  160    110  3.9    2.9  17.0     0     1     4     4
 3  22.8     4  108     93  3.85   2.3  18.6     1     1     4     1
 4  21.4     6  258    110  3.08   3.2  19.4     1     0     3     1
 5  18.7     8  360    175  3.15   3.4  17.0     0     0     3     2
 6  18.1     6  225    105  2.76   3.5  20.2     1     0     3     1
 7  14.3     8  360    245  3.21   3.6  15.8     0     0     3     4
 8  24.4     4  147.    62  3.69   3.2  20       1     0     4     2
 9  22.8     4  141.    95  3.92   3.1  22.9     1     0     4     2
10  19.2     6  168.   123  3.92   3.4  18.3     1     0     4     4
# … with 22 more rows

Rounding to 1 digit in all variables

mtcars %>% 
  adorn_rounding(digits = 1)
# A tibble: 32 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21       6  160    110   3.9   2.6  16.5     0     1     4     4
 2  21       6  160    110   3.9   2.9  17       0     1     4     4
 3  22.8     4  108     93   3.9   2.3  18.6     1     1     4     1
 4  21.4     6  258    110   3.1   3.2  19.4     1     0     3     1
 5  18.7     8  360    175   3.1   3.4  17       0     0     3     2
 6  18.1     6  225    105   2.8   3.5  20.2     1     0     3     1
 7  14.3     8  360    245   3.2   3.6  15.8     0     0     3     4
 8  24.4     4  147.    62   3.7   3.2  20       1     0     4     2
 9  22.8     4  141.    95   3.9   3.1  22.9     1     0     4     2
10  19.2     6  168.   123   3.9   3.4  18.3     1     0     4     4
# … with 22 more rows

Selecting/sorting

Select variables according to the number of distinct levels within that variable

starwars %>% 
  select(where(~ n_distinct(.) >= 20))
# A tibble: 87 × 8
   name               height  mass skin_color birth_year homeworld species films
   <chr>               <int> <dbl> <chr>           <dbl> <chr>     <chr>   <lis>
 1 Luke Skywalker        172    77 fair             19   Tatooine  Human   <chr>
 2 C-3PO                 167    75 gold            112   Tatooine  Droid   <chr>
 3 R2-D2                  96    32 white, bl…       33   Naboo     Droid   <chr>
 4 Darth Vader           202   136 white            41.9 Tatooine  Human   <chr>
 5 Leia Organa           150    49 light            19   Alderaan  Human   <chr>
 6 Owen Lars             178   120 light            52   Tatooine  Human   <chr>
 7 Beru Whitesun lars    165    75 light            47   Tatooine  Human   <chr>
 8 R5-D4                  97    32 white, red       NA   Tatooine  Droid   <chr>
 9 Biggs Darklighter     183    84 light            24   Tatooine  Human   <chr>
10 Obi-Wan Kenobi        182    77 fair             57   Stewjon   Human   <chr>
# … with 77 more rows

Select variables with a certain degree of missingness

airquality %>% 
  discard(~sum(is.na(.x))/length(.x)*100 >= 5)
# A tibble: 153 × 5
   Solar.R  Wind  Temp Month   Day
     <int> <dbl> <int> <int> <int>
 1     190   7.4    67     5     1
 2     118   8      72     5     2
 3     149  12.6    74     5     3
 4     313  11.5    62     5     4
 5      NA  14.3    56     5     5
 6      NA  14.9    66     5     6
 7     299   8.6    65     5     7
 8      99  13.8    59     5     8
 9      19  20.1    61     5     9
10     194   8.6    69     5    10
# … with 143 more rows

Selecting variables that contain a certain pattern

starwars %>% 
  select(contains("color"))
# A tibble: 87 × 3
   hair_color    skin_color  eye_color
   <chr>         <chr>       <chr>    
 1 blond         fair        blue     
 2 <NA>          gold        yellow   
 3 <NA>          white, blue red      
 4 none          white       yellow   
 5 brown         light       brown    
 6 brown, grey   light       blue     
 7 brown         light       blue     
 8 <NA>          white, red  red      
 9 black         light       brown    
10 auburn, white fair        blue-gray
# … with 77 more rows

Sort variables alphabetically

mtcars %>% 
  select(sort(peek_vars()))
# A tibble: 32 × 11
      am  carb   cyl  disp  drat  gear    hp   mpg  qsec    vs    wt
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     1     4     6  160   3.9      4   110  21    16.5     0  2.62
 2     1     4     6  160   3.9      4   110  21    17.0     0  2.88
 3     1     1     4  108   3.85     4    93  22.8  18.6     1  2.32
 4     0     1     6  258   3.08     3   110  21.4  19.4     1  3.22
 5     0     2     8  360   3.15     3   175  18.7  17.0     0  3.44
 6     0     1     6  225   2.76     3   105  18.1  20.2     1  3.46
 7     0     4     8  360   3.21     3   245  14.3  15.8     0  3.57
 8     0     2     4  147.  3.69     4    62  24.4  20       1  3.19
 9     0     2     4  141.  3.92     4    95  22.8  22.9     1  3.15
10     0     4     6  168.  3.92     4   123  19.2  18.3     1  3.44
# … with 22 more rows