Here, we show how to subset or filter a column (or vector), dataframe and matrix in R with the subset() and which() functions.

All the functions are from the "base" package.

See also deleting or naming columns and rows.

Summary Functions for Subsetting or Filtering Data Objects in R
Function Usage
subset() Subset or filter data objects
which() Subset or filter data objects

1 Subset or Filter Columns and Vectors in R

Using the columns or vectors below:

col_num = c(2, 4, 6, 8, 10, 12, 14, 16, 18, 20)
col_num
 [1]  2  4  6  8 10 12 14 16 18 20
col_chr = c("ht", "tt", "ht", "th", "hh")
col_chr
[1] "ht" "tt" "ht" "th" "hh"

Select column elements by index number:

# Select index 5
col_num[5]
[1] 10
# Select index 3 to 6
col_num[3:6] 
[1]  6  8 10 12
# Select index 3 and 5
col_chr[c(3, 5)] 
[1] "ht" "hh"

Select columns by quality:

Select values greater than 11:

which(col_num > 11)
[1]  6  7  8  9 10
# Select values greater than 11
col_num[which(col_num > 11)] 
[1] 12 14 16 18 20

Select values equal to 12 or “ht”:

which(col_num == 12)
[1] 6
# Select values equal to 12
col_num[which(col_num == 12)] 
[1] 12
which(col_chr == "ht")
[1] 1 3
# Select values equal to "ht
col_chr[which(col_chr == "ht")] 
[1] "ht" "ht"

2 Subset or Filter Dataframes (or Matrices) in R

Using the dataframe below:

dtf = data.frame(Name = c("Randy", "Sam", "Jay", "Reah"),
                 Team = c("A", "B", "B", "C"),
                 Hand = c("Right", "Left", "Right", "Right"),
                 Points = c(8, 9, 7, 9),
                 Position = c(3, 1, 4, 1))
dtf
   Name Team  Hand Points Position
1 Randy    A Right      8        3
2   Sam    B  Left      9        1
3   Jay    B Right      7        4
4  Reah    C Right      9        1

Select first few rows:

# First two row
head(dtf, 2) 
   Name Team  Hand Points Position
1 Randy    A Right      8        3
2   Sam    B  Left      9        1

Select last few rows:

# Last three row
tail(dtf, 3) 
  Name Team  Hand Points Position
2  Sam    B  Left      9        1
3  Jay    B Right      7        4
4 Reah    C Right      9        1

Select a column by name:

subset(dtf, select = c(Position))
  Position
1        3
2        1
3        4
4        1

Or alternatively subset the dataframe column as a vector:

Applies to dataframes only.

dtf$Position
[1] 3 1 4 1

Select some rows or columns by number:

# Select column 4
dtf[, 4] 
[1] 8 9 7 9
# Select rows 2 to 4
dtf[2:4,] 
  Name Team  Hand Points Position
2  Sam    B  Left      9        1
3  Jay    B Right      7        4
4 Reah    C Right      9        1

Select some columns by names, and some rows that are greater than a value:

Applies to dataframes only.

subset(dtf, Points >= 8, select = c(Name, Points))
   Name Points
1 Randy      8
2   Sam      9
4  Reah      9

Or alternatively, select columns by column numbers c(1, 4), and rows by dtf$Points >= 8:

Applies to dataframes only.

dtf[which(dtf$Points >= 8), c(1, 4)]
   Name Points
1 Randy      8
2   Sam      9
4  Reah      9

Exclude some columns by names, and select some rows that are equal to a value:

Applies to dataframes only.

subset(dtf, Hand == "Right", select = - c(Team, Points))
   Name  Hand Position
1 Randy Right        3
3   Jay Right        4
4  Reah Right        1

Or alternatively, exclude columns by column numbers c(2, 4), and select rows by dtf$Hand == “Right”:

Applies to dataframes only.

dtf[which(dtf$Hand == "Right"), - c(2, 4)]
   Name  Hand Position
1 Randy Right        3
3   Jay Right        4
4  Reah Right        1

Filter by two or more columns:

subset(dtf, Hand == "Right" & Points > 7)
   Name Team  Hand Points Position
1 Randy    A Right      8        3
4  Reah    C Right      9        1

Or alternatively:

dtf[which(dtf$Hand == "Right" & dtf$Points > 7), ]
   Name Team  Hand Points Position
1 Randy    A Right      8        3
4  Reah    C Right      9        1

Select a continuous set of columns by names:

subset(dtf, select = Team:Points)
  Team  Hand Points
1    A Right      8
2    B  Left      9
3    B Right      7
4    C Right      9

Or alternatively, select the columns by column numbers (2:4):

dtf[, 2:4]
  Team  Hand Points
1    A Right      8
2    B  Left      9
3    B Right      7
4    C Right      9

Copyright © 2020 - 2024. All Rights Reserved by Stats Codes