First, we need the Created.At column to be a "real" timestamp; in R, this means a POSIXt object. While it's not always true that we cannot work with strings, in this case since "month" is first, sorting and grabbing the minimum of the dates would take more work trying to deal with parsing the string, it's much easier to convert to a timestamp.
dat$Created.At <- as.POSIXct(dat$Created.At, format = "%m/%d/%Y %H:%M")
head(dat)
# ID Interaction.Type Target.ID Student Created.At
# 1 1 email_sent 368932 Isabel Gauss 2021-10-20 00:02:00
# 2 1 email_sent 370153 Isabel Gauss 2021-11-02 00:04:00
# 3 1 Open_sent 375012 Isabel Gauss 2021-12-06 22:15:00
# 4 1 email_sent 382353 Isabel Gauss 2022-02-03 00:06:00
# 5 50 email_sent 368932 Jen Gonzalez 2021-10-20 00:02:00
# 6 50 email_sent 370153 Jen Gonzalez 2021-11-02 00:04:00
From here, the operations are broken down into two things:
- summarize/filter by group, see Calculate the mean by group, How to sum a variable by group, Apply several summary functions (sum, mean, etc.) on several variables by group in one call
- merge/join, see How to join (merge) data frames (inner, outer, left, right), What's the difference between INNER JOIN, LEFT JOIN, RIGHT JOIN and FULL JOIN?
dplyr
library(dplyr)
dat %>%
slice_min(Created.At, by = Student) %>%
left_join(B, ., by = "ID")
# ID Interaction.Type Target.ID Student Created.At
# 1 1 email_sent 368932 Isabel Gauss 2021-10-20 00:02:00
# 2 125 <NA> NA <NA> <NA>
# 3 200 email_sent 343543 Jason Bin 2021-02-08 01:05:00
# 4 550 email_sent 344546 Brad pit 2021-10-20 00:02:00
# 5 870 <NA> NA <NA> <NA>
base R
tmp <- dat[with(dat, ave(as.numeric(Created.At), Student, FUN = function(z) z == min(z))) > 0,]
tmp
# ID Interaction.Type Target.ID Student Created.At
# 1 1 email_sent 368932 Isabel Gauss 2021-10-20 00:02:00
# 5 50 email_sent 368932 Jen Gonzalez 2021-10-20 00:02:00
# 8 10 email_sent 354609 Isabel Goodwin 2021-09-23 21:09:00
# 13 200 email_sent 343543 Jason Bin 2021-02-08 01:05:00
# 15 550 email_sent 344546 Brad pit 2021-10-20 00:02:00
merge(B, tmp, by = "ID", all.x = TRUE)
# ID Interaction.Type Target.ID Student Created.At
# 1 1 email_sent 368932 Isabel Gauss 2021-10-20 00:02:00
# 2 125 <NA> NA <NA> <NA>
# 3 200 email_sent 343543 Jason Bin 2021-02-08 01:05:00
# 4 550 email_sent 344546 Brad pit 2021-10-20 00:02:00
# 5 870 <NA> NA <NA> <NA>
Data
dat <- structure(list(ID = c(1L, 1L, 1L, 1L, 50L, 50L, 50L, 10L, 10L, 10L, 10L, 200L, 200L, 550L, 550L, 550L), Interaction.Type = c("email_sent", "email_sent", "Open_sent", "email_sent", "email_sent", "email_sent", "email_sent", "email_sent", "email_sent", "email_sent", "Open_sent", "email_sent", "email_sent", "email_sent", "email_sent", "email_sent"), Target.ID = c(368932L, 370153L, 375012L, 382353L, 368932L, 370153L, 375012L, 354609L, 368089L, 375017L, 383095L, 343546L, 343543L, 546354L, 344546L, 343434L ), Student = c("Isabel Gauss", "Isabel Gauss", "Isabel Gauss", "Isabel Gauss", "Jen Gonzalez", "Jen Gonzalez", "Jen Gonzalez", "Isabel Goodwin", "Isabel Goodwin", "Isabel Goodwin", "Isabel Goodwin", "Jason Bin", "Jason Bin", "Brad pit", "Brad pit", "Brad pit"), Created.At = c("10/20/2021 0:02", "11/2/2021 0:04", "12/6/2021 22:15", "2/3/2022 0:06", "10/20/2021 0:02", "11/2/2021 0:04", "12/6/2021 22:15", "9/23/2021 21:09", "10/20/2021 0:02", "12/5/2021 21:05", "2/8/2022 1:05", "10/9/2022 21:05", "2/8/2021 1:05", "05/29/2023 0:02", "10/20/2021 0:02", "08/15/2022 0:02")), class = "data.frame", row.names = c(NA, -16L))
B <- structure(list(ID = c(1L, 125L, 200L, 550L, 870L)), class = "data.frame", row.names = c(NA, -5L))