Skip to content

Commit

Permalink
Add int branch to binsearch
Browse files Browse the repository at this point in the history
The binsearch C function now branches on double and integer types, so
no conversion is done if 'key' and 'vec' are the same type. Cast both
to double if they're not the same type.

Create a struct that contains int and double fields, so we can pass
it to a generic compare function. Create 4 compare functions, one for
each type/start combination. All these components are static inline,
with the hope the compiler can optimize them.

Removing the as.double() call means ISO-8601 range-based subsetting
is nearly twice as fast for integer vectors and keys.

See #100.
  • Loading branch information
joshuaulrich committed Apr 29, 2018
1 parent 7463da7 commit 421366c
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 72 deletions.
6 changes: 5 additions & 1 deletion R/xts.methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,11 @@ window.xts <- function(x, index. = NULL, start = NULL, end = NULL, ...)

# Declare binsearch to call the routine in binsearch.c
binsearch <- function(key, vec, start=TRUE) {
.Call("binsearch", as.double(key), as.double(vec), start, PACKAGE='xts')
# Convert to double if both are not integer
if (storage.mode(key) != storage.mode(vec)) {
storage.mode(key) <- storage.mode(vec) <- "double"
}
.Call("binsearch", key, vec, start, PACKAGE='xts')
}

# Unit tests for the above code may be found in runit.xts.methods.R
152 changes: 81 additions & 71 deletions src/binsearch.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,93 +4,103 @@

/* Binary search range to find interval contributed by Corwin Joy */

/* Find the smallest index for which A[index] >= key */
int lower_bound(double key, double *A, int len_A) {
int mid;
int lo = 0;
int hi = len_A-1;
while (lo < hi) {
mid = lo + (hi - lo) / 2;
if (A[mid] >= key) {
hi = mid;
}
else {
lo = mid + 1;
}
}
static struct keyvec {
double *dvec;
double dkey;
int *ivec;
int ikey;
};

return lo; // lo is the least x for which A[x] >= key is true
typedef int (*bound_comparer)(const struct keyvec, const int);
static inline int
cmp_dbl_upper(const struct keyvec kv, const int i)
{
const double cv = kv.dvec[i];
const double ck = kv.dkey;
return cv > ck;
}

/* Find the smallest index for which A[index] > key */
int upper_bound(double key, double *A, int len_A) {
int mid;
int lo = 0;
int hi = len_A - 1;
while (lo < hi) {
mid = lo + (hi - lo) / 2;
if (A[mid] > key) {
hi = mid;
}
else {
lo = mid + 1;
}
}

return lo; // lo is the least x for which A[x] > key is true
static inline int
cmp_dbl_lower(const struct keyvec kv, const int i)
{
const double cv = kv.dvec[i];
const double ck = kv.dkey;
return cv >= ck;
}

/* bound the key in the array of vals.
if start == true, return lowest index s.t. vals[index] >= key
if start == false, return highest index s.t. vals[index] <= key
*/
int bound(double key, double *vals, int len_vals, int start) {
int item;
if (start) {
item = lower_bound(key, vals, len_vals);
}
else {
item = upper_bound(key, vals, len_vals);
/* if handles edge cases. item may be at the lo/hi end of array */
if (item > 0 && vals[item] > key) --item;
}
return(item);
static inline int
cmp_int_upper(const struct keyvec kv, const int i)
{
const int cv = kv.ivec[i];
const int ck = kv.ikey;
return cv > ck;
}
static inline int
cmp_int_lower(const struct keyvec kv, const int i)
{
const int cv = kv.ivec[i];
const int ck = kv.ikey;
return cv >= ck;
}

SEXP binsearch(SEXP key, SEXP vec, SEXP start)
{
double *rvec;
double *rkey;
int item;
int len_vec = length(vec);

if(!(TYPEOF(vec) == REALSXP && TYPEOF(key) == REALSXP)) {
error("both key and vec must be of type double");
}

if(isNull(start)) {
if (!isLogical(start)) {
error("start must be specified as true or false");
}

if(len_vec < 1) {
if (length(vec) < 1) {
return ScalarInteger(NA_INTEGER);
}

rvec = REAL(vec);
rkey = REAL(key);
item = bound(*rkey, rvec, len_vec, LOGICAL(start)[0]);
int use_start = LOGICAL(start)[0];
bound_comparer cmp_func = NULL;
struct keyvec data;

switch (TYPEOF(vec)) {
case REALSXP:
data.dkey = REAL(key)[0];
data.dvec = REAL(vec);
cmp_func = (use_start) ? cmp_dbl_lower : cmp_dbl_upper;
break;
case INTSXP:
data.ikey = INTEGER(key)[0];
data.ivec = INTEGER(vec);
cmp_func = (use_start) ? cmp_int_lower : cmp_int_upper;
break;
default:
error("unsupported type");
}

int mid;
int lo = 0;
int hi = length(vec) - 1;

while (lo < hi) {
mid = lo + (hi - lo) / 2;
if (cmp_func(data, mid)) {
hi = mid;
}
else {
lo = mid + 1;
}
}

if(LOGICAL(start)[0]) {
if(rvec[item] < *rkey) {
/* start = TRUE, but entire array < key */
return ScalarInteger(NA_INTEGER);
/* handle edge cases where item may be at the lo/hi end of array */
if (use_start) {
/* cmp_func is: vec[lo] >= k */
if (cmp_func(data, lo)) {
lo++;
} else {
lo = NA_INTEGER;
}
} else {
if(rvec[item] > *rkey) {
/* start = FALSE, but entire array > key */
return ScalarInteger(NA_INTEGER);
if (lo > 0 && cmp_func(data, lo)) lo--;
/* cmp_func is: vec[lo] > k */
if (cmp_func(data, lo)) {
lo = NA_INTEGER;
} else {
lo++;
}
}
return ScalarInteger(item+1);
}

return ScalarInteger(lo);
}

0 comments on commit 421366c

Please sign in to comment.