Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/fission fusion #78

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions R/fusion_id.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#' Identify fusion events
#'
#' @param edges edges generated by edge_dist
#' @param threshold spatial distance threshold in the units of the projection
#' @param n_min_length minimum length of fusion events
#' @param n_max_missing maximum number of missing observations within a fusion event
#' @param allow_split boolean if a single observation can be greater than the
#' threshold distance without initiating fission event
fusion_id <- function(edges,
threshold = 50,
n_min_length = 0,
n_max_missing = 0,
allow_split = FALSE) {

stopifnot('dyadID' %in% colnames(edges))
stopifnot('timegroup' %in% colnames(edges))
stopifnot('distance' %in% colnames(edges))

stopifnot(is.numeric(threshold))
stopifnot(is.numeric(n_min_length))
stopifnot(is.numeric(n_max_missing))

stopifnot(threshold >= 0)

unique_edges <- unique(edges[, .(dyadID, timegroup, distance)])

setorder(unique_edges, 'timegroup')

# Check if edge distance less than threshold
unique_edges[, within := distance < threshold]

# If allow split, check if previously within threshold, and
# difference between before, after timegroups is only 1
if (allow_split) {
unique_edges[, within := data.table::fifelse(within | timegroup == min(timegroup),
within,
data.table::shift(within, -1) & data.table::shift(within, 1) &
timegroup - data.table::shift(timegroup, 1) == 1),
by = dyadID]
}

# Runs of within
unique_edges[, within_rleid := data.table::rleid(within), by = dyadID]
unique_edges[!(within), within_rleid := NA_integer_]

# Check timegroup difference, unless first obs for dyad
unique_edges[, tg_diff := data.table::fifelse(within,
timegroup - data.table::shift(timegroup, 1) <= 1 |
timegroup == min(timegroup),
NA),
by = dyadID]

# If missing obs allowed, adjust timegroup difference to allow as long as
# previously within threshold distance
if (n_max_missing > 0) {
unique_edges[, tg_diff := data.table::fifelse(tg_diff,
tg_diff,
data.table::shift(within, 1) &
(timegroup - data.table::shift(timegroup, 1)) <=
(1 + n_max_missing)),
by = dyadID]
}

# Get runs on within and timegroup difference. Adjust if runs of isolated
# observations together (eg. within T, T but timegroup diff F, F)
unique_edges[(within), both_rleid := data.table::rleid(within_rleid, tg_diff), by = dyadID]
unique_edges[(within) & !(tg_diff),
both_rleid := (both_rleid + seq.int(.N)) * -1,
by = dyadID]

# Correct if (looking forward) the loc is part of a new fusion run
unique_edges[, both_rleid := data.table::fifelse(
timegroup - data.table::shift(timegroup, - 1) == -1 &
within & !(tg_diff),
data.table::shift(both_rleid, -1),
both_rleid),
by = dyadID]

# If n minimum length > 0, check nrows and return NA if less than min
if (n_min_length > 0) {
unique_edges[!is.na(both_rleid),
both_rleid := data.table::fifelse(.N >= n_min_length, both_rleid, NA_integer_),
by = .(dyadID, both_rleid)]
}

# Set fusion id on runs and dyad id
unique_edges[!is.na(both_rleid), fusionID := .GRP, by = .(dyadID, both_rleid)]

# Merge fusion id onto input edges
edges[unique_edges, fusionID := fusionID, on = .(timegroup, dyadID)]
return(edges)
}
Loading