Replace all particular observations in a dataframe with another

iris %>% 
  mutate(across(everything(), ~str_replace(., "setosa", "Best flower")))
# A tibble: 150 × 5
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species    
   <chr>        <chr>       <chr>        <chr>       <chr>      
 1 5.1          3.5         1.4          0.2         Best flower
 2 4.9          3           1.4          0.2         Best flower
 3 4.7          3.2         1.3          0.2         Best flower
 4 4.6          3.1         1.5          0.2         Best flower
 5 5            3.6         1.4          0.2         Best flower
 6 5.4          3.9         1.7          0.4         Best flower
 7 4.6          3.4         1.4          0.3         Best flower
 8 5            3.4         1.5          0.2         Best flower
 9 4.4          2.9         1.4          0.2         Best flower
10 4.9          3.1         1.5          0.1         Best flower
# … with 140 more rows

Convert all observations that contain a particular character to missing data

starwars %>% 
  mutate(across(where(is.character), ~ replace(., str_detect(., "-"), NA)))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 <NA>        167    75 <NA>       gold       yellow         112   none  mascu…
 3 <NA>         96    32 <NA>       white, bl… red             33   none  mascu…
 4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
 5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
 6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 <NA>         97    32 <NA>       white, red red             NA   none  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 <NA>        182    77 auburn, w… fair       <NA>            57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Convert all observations that are missing a particular character to missing data

starwars %>% 
  mutate(across(where(is.character), ~ replace(., !str_detect(., "-"), NA)))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
 1 <NA>        172    77 <NA>       <NA>       <NA>            19   <NA>  <NA>  
 2 C-3PO       167    75 <NA>       <NA>       <NA>           112   <NA>  <NA>  
 3 R2-D2        96    32 <NA>       <NA>       <NA>            33   <NA>  <NA>  
 4 <NA>        202   136 <NA>       <NA>       <NA>            41.9 <NA>  <NA>  
 5 <NA>        150    49 <NA>       <NA>       <NA>            19   <NA>  <NA>  
 6 <NA>        178   120 <NA>       <NA>       <NA>            52   <NA>  <NA>  
 7 <NA>        165    75 <NA>       <NA>       <NA>            47   <NA>  <NA>  
 8 R5-D4        97    32 <NA>       <NA>       <NA>            NA   <NA>  <NA>  
 9 <NA>        183    84 <NA>       <NA>       <NA>            24   <NA>  <NA>  
10 Obi-Wan…    182    77 <NA>       <NA>       blue-gray       57   <NA>  <NA>  
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Add trailing 0s if observations are less than a specified length

starwars %>% 
  mutate(name = str_pad(name, 12, side = 'left', pad = 0))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 0000000…    167    75 <NA>       gold       yellow         112   none  mascu…
 3 0000000…     96    32 <NA>       white, bl… red             33   none  mascu…
 4 0Darth …    202   136 none       white      yellow          41.9 male  mascu…
 5 0Leia O…    150    49 brown      light      brown           19   fema… femin…
 6 000Owen…    178   120 brown, gr… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 0000000…     97    32 <NA>       white, red red             NA   none  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Add following 0s if observations are less than a specified length

starwars %>% 
  mutate(name = str_pad(name, 12, side = 'right', pad = 0))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 C-3PO00…    167    75 <NA>       gold       yellow         112   none  mascu…
 3 R2-D200…     96    32 <NA>       white, bl… red             33   none  mascu…
 4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
 5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
 6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 R5-D400…     97    32 <NA>       white, red red             NA   none  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Convert all observations in a variable to a particular length

starwars %>% 
  mutate(name = str_sub(name, 1, 4))
# A tibble: 87 × 14
   name  height  mass hair_color    skin_color eye_color birth_year sex   gender
   <chr>  <int> <dbl> <chr>         <chr>      <chr>          <dbl> <chr> <chr> 
 1 Luke     172    77 blond         fair       blue            19   male  mascu…
 2 C-3P     167    75 <NA>          gold       yellow         112   none  mascu…
 3 R2-D      96    32 <NA>          white, bl… red             33   none  mascu…
 4 Dart     202   136 none          white      yellow          41.9 male  mascu…
 5 Leia     150    49 brown         light      brown           19   fema… femin…
 6 Owen     178   120 brown, grey   light      blue            52   male  mascu…
 7 Beru     165    75 brown         light      blue            47   fema… femin…
 8 R5-D      97    32 <NA>          white, red red             NA   none  mascu…
 9 Bigg     183    84 black         light      brown           24   male  mascu…
10 Obi-     182    77 auburn, white fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Remove the first 2 characters from an observation (i.e., start at character 3)

starwars %>% 
  mutate(name = str_sub(name, start = 3))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
 1 ke Skyw…    172    77 blond      fair       blue            19   male  mascu…
 2 3PO         167    75 <NA>       gold       yellow         112   none  mascu…
 3 -D2          96    32 <NA>       white, bl… red             33   none  mascu…
 4 rth Vad…    202   136 none       white      yellow          41.9 male  mascu…
 5 ia Orga…    150    49 brown      light      brown           19   fema… femin…
 6 en Lars     178   120 brown, gr… light      blue            52   male  mascu…
 7 ru Whit…    165    75 brown      light      blue            47   fema… femin…
 8 -D4          97    32 <NA>       white, red red             NA   none  mascu…
 9 ggs Dar…    183    84 black      light      brown           24   male  mascu…
10 i-Wan K…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Filter according to the presence of a particular ordered pattern

starwars %>% 
  filter(str_detect(name, "Skywalker"))
# A tibble: 3 × 14
  name      height  mass hair_color skin_color eye_color birth_year sex   gender
  <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
1 Luke Sky…    172    77 blond      fair       blue            19   male  mascu…
2 Anakin S…    188    84 blond      fair       blue            41.9 male  mascu…
3 Shmi Sky…    163    NA black      fair       brown           72   fema… femin…
# … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
#   vehicles <list>, starships <list>

Filter according to the length of an observation

starwars %>% 
  filter(nchar(name) < 6)
# A tibble: 14 × 14
   name  height  mass hair_color skin_color  eye_color birth_year sex    gender 
   <chr>  <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr>  <chr>  
 1 C-3PO    167    75 <NA>       gold        yellow           112 none   mascul…
 2 R2-D2     96    32 <NA>       white, blue red               33 none   mascul…
 3 R5-D4     97    32 <NA>       white, red  red               NA none   mascul…
 4 Yoda      66    17 white      green       brown            896 male   mascul…
 5 IG-88    200   140 none       metal       red               15 none   mascul…
 6 Bossk    190   113 none       green       red               53 male   mascul…
 7 Lobot    175    79 none       light       blue              37 male   mascul…
 8 Watto    137    NA black      blue, grey  yellow            NA male   mascul…
 9 Cordé    157    NA brown      light       brown             NA female femini…
10 Dormé    165    NA brown      light       brown             NA female femini…
11 Dooku    193    80 white      fair        brown            102 male   mascul…
12 Finn      NA    NA black      dark        dark              NA male   mascul…
13 Rey       NA    NA brown      light       hazel             NA female femini…
14 BB8       NA    NA none       none        black             NA none   mascul…
# … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
#   vehicles <list>, starships <list>

Remove all letters from a variable

starwars %>% 
  mutate(name = gsub("[a-zA-Z ]", "", name))
# A tibble: 87 × 14
   name  height  mass hair_color    skin_color eye_color birth_year sex   gender
   <chr>  <int> <dbl> <chr>         <chr>      <chr>          <dbl> <chr> <chr> 
 1 ""       172    77 blond         fair       blue            19   male  mascu…
 2 "-3"     167    75 <NA>          gold       yellow         112   none  mascu…
 3 "2-2"     96    32 <NA>          white, bl… red             33   none  mascu…
 4 ""       202   136 none          white      yellow          41.9 male  mascu…
 5 ""       150    49 brown         light      brown           19   fema… femin…
 6 ""       178   120 brown, grey   light      blue            52   male  mascu…
 7 ""       165    75 brown         light      blue            47   fema… femin…
 8 "5-4"     97    32 <NA>          white, red red             NA   none  mascu…
 9 ""       183    84 black         light      brown           24   male  mascu…
10 "-"      182    77 auburn, white fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Remove all non-numeric characters from the observations of a particular variable

starwars %>%
  mutate(name = gsub("[^0-9]", "", name))
# A tibble: 87 × 14
   name  height  mass hair_color    skin_color eye_color birth_year sex   gender
   <chr>  <int> <dbl> <chr>         <chr>      <chr>          <dbl> <chr> <chr> 
 1 ""       172    77 blond         fair       blue            19   male  mascu…
 2 "3"      167    75 <NA>          gold       yellow         112   none  mascu…
 3 "22"      96    32 <NA>          white, bl… red             33   none  mascu…
 4 ""       202   136 none          white      yellow          41.9 male  mascu…
 5 ""       150    49 brown         light      brown           19   fema… femin…
 6 ""       178   120 brown, grey   light      blue            52   male  mascu…
 7 ""       165    75 brown         light      blue            47   fema… femin…
 8 "54"      97    32 <NA>          white, red red             NA   none  mascu…
 9 ""       183    84 black         light      brown           24   male  mascu…
10 ""       182    77 auburn, white fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Remove all numeric characters from the observations of a particular variable

starwars %>%
  mutate(name = gsub("[0-9]", "", name))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 C-PO        167    75 <NA>       gold       yellow         112   none  mascu…
 3 R-D          96    32 <NA>       white, bl… red             33   none  mascu…
 4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
 5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
 6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 R-D          97    32 <NA>       white, red red             NA   none  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>