From e86ebdd20bcb0d3878f789194abd5a6aaa7ffd5a Mon Sep 17 00:00:00 2001 From: Sergio Correia Date: Wed, 31 Jan 2018 14:50:12 -0500 Subject: [PATCH] Try to improve some corner cases when checking convergence Two main changes: 1. If SSR is very close to zero, it gets edited to zero. Otherwise, on cases with perfect or near perfect collinearity, we might fail to converge as we end up in a case where SSR / PreviouSSR = 0 / 0 (or close to it) 2. Convergence should be invariant to rescaling of the weights. For instance, if we use pw=weightvar we should get the same result than if we use pw=1000*weightvar. However, because convergence is based on the weighted SSR, this is not the case. This commit fixes the cases with weights below 1, so setting weights based of pop in thousands will not give less accurate results than using pop in units. However, we didn't do the other case (where making all weights larger improves the accuracy) --- src/reghdfe.ado | 2 +- src/reghdfe_accelerations.mata | 18 +++++++++++++----- src/reghdfe_class.mata | 16 ++++++++++++++-- src/reghdfe_common.mata | 1 + 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/reghdfe.ado b/src/reghdfe.ado index 1d6aab0..2b8aeb3 100644 --- a/src/reghdfe.ado +++ b/src/reghdfe.ado @@ -1,4 +1,4 @@ -*! version 4.4.12 10nov2017 +*! version 4.4.13 31jan2018 program reghdfe, eclass * Intercept old+version diff --git a/src/reghdfe_accelerations.mata b/src/reghdfe_accelerations.mata index 27e3a86..e3b3f5e 100644 --- a/src/reghdfe_accelerations.mata +++ b/src/reghdfe_accelerations.mata @@ -210,6 +210,7 @@ mata: `Boolean' check_convergence(`FixedEffects' S, `Integer' iter, `Variables' y_new, `Variables' y_old,| `String' method) { `Boolean' is_last_iter `Real' update_error + `Real' eps_threshold // max() ensures that the result when bunching vars is at least as good as when not bunching if (args()<5) method = "vectors" @@ -224,8 +225,15 @@ mata: else if (method=="hestenes") { // If the regressor is perfectly explained by the absvars, we can have SSR very close to zero but negative // (so sqrt is missing) - // todo: add weights to hestenes (S.weight , as in "vectors" method) - update_error = max(safe_divide( sqrt(y_new) , editmissing(sqrt(y_old), sqrt(epsilon(1)) ) , sqrt(epsilon(1)) )) + + eps_threshold = 1e-13 // 100 * epsilon(1) + if (S.verbose > 0 & any(y_new :< eps_threshold)) { + printf("{txt} note: eps. is very close to zero (%g), so hestenes assumed convergence to avoid numerical precision errors\n", min(y_new)) + } + update_error = safe_divide(edittozerotol(y_new, eps_threshold ), + editmissing(y_old, epsilon(1)), + epsilon(1) ) + update_error = sqrt(max(update_error)) } else { exit(error(100)) @@ -276,15 +284,15 @@ mata: // We are using cross instead of quadcross but it should not matter for this use if (S.has_weights) { if (cols(x) < 14) { - return(cross(x :* y, S.weight)') + return(quadcross(x :* y, S.weight)') } else { - return(diagonal(cross(x, S.weight, y))') + return(diagonal(quadcross(x, S.weight, y))') } } else { if (cols(x) < 25) { - return(diagonal(cross(x, y))') + return(diagonal(quadcross(x, y))') } else { return(colsum(x :* y)) diff --git a/src/reghdfe_class.mata b/src/reghdfe_class.mata index 147e166..419c87f 100644 --- a/src/reghdfe_class.mata +++ b/src/reghdfe_class.mata @@ -151,7 +151,7 @@ class FixedEffects `Void' post() `FixedEffects' reload() // create new instance of object - //LSMR-Specific Methods + // LSMR-Specific Methods `Real' lsmr_norm() `Vector' lsmr_A_mult() `Vector' lsmr_At_mult() @@ -231,6 +231,7 @@ class FixedEffects `Void' FixedEffects::update_sorted_weights(`Variable' weight) { `Integer' g + `Real' min_w `Variable' w `FactorPointer' pf @@ -241,6 +242,17 @@ class FixedEffects if (verbose > 0) printf("{txt} - sorting weight for factor {res}%s{txt}\n", absvars[g]) pf = &(factors[g]) w = (*pf).sort(weight) + + // Rescale weights so there are no weights below 1 + if (weight_type != "fweight") { + min_w = colmin(w) + //assert_msg(min_w > 0, "weights must be positive") + //if (min_w <= 0) printf("{err} not all weights are positive\n") + if (0 < min_w & min_w < 1) { + w = w :/ min_w + } + } + asarray((*pf).extra, "weights", w) asarray((*pf).extra, "weighted_counts", `panelsum'(w, (*pf).info)) } @@ -474,7 +486,7 @@ class FixedEffects if (timeit) timer_off(62) if (prune) { - assert(G==2) + assert_msg(G==2, "prune option requires only two FEs") if (timeit) timer_on(63) _expand_1core(y) if (timeit) timer_off(63) diff --git a/src/reghdfe_common.mata b/src/reghdfe_common.mata index 84b0c71..4c585ac 100644 --- a/src/reghdfe_common.mata +++ b/src/reghdfe_common.mata @@ -58,6 +58,7 @@ mata: // Divide two row vectors but adjust the denominator if it's too small // -------------------------------------------------------------------------- `RowVector' safe_divide(`RowVector' numerator, `RowVector' denominator, | `Real' epsi) { + // If the numerator goes below machine precision, we lose accuracy // If the denominator goes below machine precision, the division explodes if (args()<3 | epsi==.) epsi = epsilon(1) return( numerator :/ colmax(denominator \ J(1,cols(denominator),epsi)) )