Replace particular observations with NAs in a single variable

starwars %>% 
  mutate(sex = na_if(sex, "none"))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 C-3PO       167    75 <NA>       gold       yellow         112   <NA>  mascu…
 3 R2-D2        96    32 <NA>       white, bl… red             33   <NA>  mascu…
 4 Darth V…    202   136 none       white      yellow          41.9 male  mascu…
 5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
 6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 R5-D4        97    32 <NA>       white, red red             NA   <NA>  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Replace particular observations with NAs in all character variables

starwars %>% 
  mutate(across(where(is.character), ~na_if(., "none")))
# A tibble: 87 × 14
   name     height  mass hair_color skin_color eye_color birth_year sex   gender
   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr> 
 1 Luke Sk…    172    77 blond      fair       blue            19   male  mascu…
 2 C-3PO       167    75 <NA>       gold       yellow         112   <NA>  mascu…
 3 R2-D2        96    32 <NA>       white, bl… red             33   <NA>  mascu…
 4 Darth V…    202   136 <NA>       white      yellow          41.9 male  mascu…
 5 Leia Or…    150    49 brown      light      brown           19   fema… femin…
 6 Owen La…    178   120 brown, gr… light      blue            52   male  mascu…
 7 Beru Wh…    165    75 brown      light      blue            47   fema… femin…
 8 R5-D4        97    32 <NA>       white, red red             NA   <NA>  mascu…
 9 Biggs D…    183    84 black      light      brown           24   male  mascu…
10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  mascu…
# … with 77 more rows, and 5 more variables: homeworld <chr>, species <chr>,
#   films <list>, vehicles <list>, starships <list>

Explictly replace all blank space in a dataset with NAs

airquality %>% 
  mutate(across(everything(), ~ na_if(.,"")))
# A tibble: 153 × 6
   Ozone Solar.R  Wind  Temp Month   Day
   <int>   <int> <dbl> <int> <int> <int>
 1    41     190   7.4    67     5     1
 2    36     118   8      72     5     2
 3    12     149  12.6    74     5     3
 4    18     313  11.5    62     5     4
 5    NA      NA  14.3    56     5     5
 6    28      NA  14.9    66     5     6
 7    23     299   8.6    65     5     7
 8    19      99  13.8    59     5     8
 9     8      19  20.1    61     5     9
10    NA     194   8.6    69     5    10
# … with 143 more rows

Replace NAs with specified values in a vector

airquality %>% 
  mutate(Ozone = replace_na(Ozone, 0))
# A tibble: 153 × 6
   Ozone Solar.R  Wind  Temp Month   Day
   <int>   <int> <dbl> <int> <int> <int>
 1    41     190   7.4    67     5     1
 2    36     118   8      72     5     2
 3    12     149  12.6    74     5     3
 4    18     313  11.5    62     5     4
 5     0      NA  14.3    56     5     5
 6    28      NA  14.9    66     5     6
 7    23     299   8.6    65     5     7
 8    19      99  13.8    59     5     8
 9     8      19  20.1    61     5     9
10     0     194   8.6    69     5    10
# … with 143 more rows

Replace all NAs in a dataframe with “0”

airquality %>% 
  replace(is.na(.), 0)
# A tibble: 153 × 6
   Ozone Solar.R  Wind  Temp Month   Day
   <dbl>   <dbl> <dbl> <int> <int> <int>
 1    41     190   7.4    67     5     1
 2    36     118   8      72     5     2
 3    12     149  12.6    74     5     3
 4    18     313  11.5    62     5     4
 5     0       0  14.3    56     5     5
 6    28       0  14.9    66     5     6
 7    23     299   8.6    65     5     7
 8    19      99  13.8    59     5     8
 9     8      19  20.1    61     5     9
10     0     194   8.6    69     5    10
# … with 143 more rows

Keep only rows that contain missing data

airquality %>% 
  filter(if_any(everything(), ~ is.na(.x)))
# A tibble: 42 × 6
   Ozone Solar.R  Wind  Temp Month   Day
   <int>   <int> <dbl> <int> <int> <int>
 1    NA      NA  14.3    56     5     5
 2    28      NA  14.9    66     5     6
 3    NA     194   8.6    69     5    10
 4     7      NA   6.9    74     5    11
 5    NA      66  16.6    57     5    25
 6    NA     266  14.9    58     5    26
 7    NA      NA   8      57     5    27
 8    NA     286   8.6    78     6     1
 9    NA     287   9.7    74     6     2
10    NA     242  16.1    67     6     3
# … with 32 more rows

Filter but keep missing data

airquality %>% 
  filter((Temp < 80) %>% replace_na(TRUE))
# A tibble: 80 × 6
   Ozone Solar.R  Wind  Temp Month   Day
   <int>   <int> <dbl> <int> <int> <int>
 1    41     190   7.4    67     5     1
 2    36     118   8      72     5     2
 3    12     149  12.6    74     5     3
 4    18     313  11.5    62     5     4
 5    NA      NA  14.3    56     5     5
 6    28      NA  14.9    66     5     6
 7    23     299   8.6    65     5     7
 8    19      99  13.8    59     5     8
 9     8      19  20.1    61     5     9
10    NA     194   8.6    69     5    10
# … with 70 more rows

Remove empty rows

mtcars %>% 
  remove_empty("rows")
# A tibble: 32 × 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
 3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
 4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
 5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
 6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
 7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
 8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2
 9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
# … with 22 more rows

Select variables with a certain degree of missingness

airquality %>% 
  discard(~sum(is.na(.x))/length(.x)*100 >= 5)
# A tibble: 153 × 5
   Solar.R  Wind  Temp Month   Day
     <int> <dbl> <int> <int> <int>
 1     190   7.4    67     5     1
 2     118   8      72     5     2
 3     149  12.6    74     5     3
 4     313  11.5    62     5     4
 5      NA  14.3    56     5     5
 6      NA  14.9    66     5     6
 7     299   8.6    65     5     7
 8      99  13.8    59     5     8
 9      19  20.1    61     5     9
10     194   8.6    69     5    10
# … with 143 more rows

Count the NAs per row

airquality %>% 
  rowwise() %>%
    mutate(number_missing = sum(is.na(cur_data()))) %>%
    ungroup()
# A tibble: 153 × 7
   Ozone Solar.R  Wind  Temp Month   Day number_missing
   <int>   <int> <dbl> <int> <int> <int>          <int>
 1    41     190   7.4    67     5     1              0
 2    36     118   8      72     5     2              0
 3    12     149  12.6    74     5     3              0
 4    18     313  11.5    62     5     4              0
 5    NA      NA  14.3    56     5     5              2
 6    28      NA  14.9    66     5     6              1
 7    23     299   8.6    65     5     7              0
 8    19      99  13.8    59     5     8              0
 9     8      19  20.1    61     5     9              0
10    NA     194   8.6    69     5    10              1
# … with 143 more rows

Count the number of NAs in each variable

colSums(is.na(airquality))
  Ozone Solar.R    Wind    Temp   Month     Day 
     37       7       0       0       0       0