Skip to content

Commit

Permalink
Fix require grad warning for non-leaf tensor in noise tunnel (pytorch…
Browse files Browse the repository at this point in the history
…#426)

Summary:
This will fix the warning error specifically related to NoiseTunnel in pytorch#421.
In addition to that I moved almost everything under no_grad in the attribute method. This will hopefully also help with runtime performance.
In the `_forward_layer_eval ` I had to add `grad_enabled ` flag in order to allow to enable the gradients externally. As it is also needed in `test_neuron_gradient.py` test case.

Pull Request resolved: pytorch#426

Reviewed By: vivekmig

Differential Revision: D22500566

Pulled By: NarineK

fbshipit-source-id: d3170e1711012593ff421b964a02e54532a95b13
  • Loading branch information
NarineK authored and facebook-github-bot committed Jul 13, 2020
1 parent d80e857 commit e5ea6fd
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 80 deletions.
22 changes: 15 additions & 7 deletions captum/_utils/gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,15 @@ def _forward_layer_eval(
additional_forward_args: Any = None,
device_ids: Union[None, List[int]] = None,
attribute_to_layer_input: bool = False,
grad_enabled: bool = False,
) -> Tuple[Tuple[Tensor, ...], Literal[True, False]]:
return _forward_layer_eval_with_neuron_grads(
forward_fn,
inputs,
layer,
additional_forward_args=additional_forward_args,
gradient_neuron_index=None,
grad_enabled=grad_enabled,
device_ids=device_ids,
attribute_to_layer_input=attribute_to_layer_input,
)
Expand Down Expand Up @@ -311,6 +313,7 @@ def _forward_layer_eval_with_neuron_grads(
additional_forward_args: Any = None,
*,
gradient_neuron_index: Union[int, Tuple[int, ...]],
grad_enabled: bool = False,
device_ids: Union[None, List[int]] = None,
attribute_to_layer_input: bool = False,
) -> Tuple[Tuple[Tensor, ...], Tuple[Tensor, ...], Literal[True, False]]:
Expand All @@ -324,6 +327,7 @@ def _forward_layer_eval_with_neuron_grads(
layer: Module,
additional_forward_args: Any = None,
gradient_neuron_index: None = None,
grad_enabled: bool = False,
device_ids: Union[None, List[int]] = None,
attribute_to_layer_input: bool = False,
) -> Tuple[Tuple[Tensor, ...], Literal[True, False]]:
Expand All @@ -336,6 +340,7 @@ def _forward_layer_eval_with_neuron_grads(
layer: Module,
additional_forward_args: Any = None,
gradient_neuron_index: Union[None, int, Tuple[int, ...]] = None,
grad_enabled: bool = False,
device_ids: Union[None, List[int]] = None,
attribute_to_layer_input: bool = False,
) -> Union[
Expand All @@ -357,13 +362,16 @@ def _forward_layer_eval_with_neuron_grads(
evals in a dictionary protected by a lock, analogous to the gather implementation
for the core PyTorch DataParallel implementation.
"""
saved_layer, is_layer_tuple = _forward_layer_distributed_eval(
forward_fn,
inputs,
layer,
additional_forward_args=additional_forward_args,
attribute_to_layer_input=attribute_to_layer_input,
)
grad_enabled = True if gradient_neuron_index is not None or grad_enabled else False

with torch.autograd.set_grad_enabled(grad_enabled):
saved_layer, is_layer_tuple = _forward_layer_distributed_eval(
forward_fn,
inputs,
layer,
additional_forward_args=additional_forward_args,
attribute_to_layer_input=attribute_to_layer_input,
)
device_ids = _extract_device_ids(forward_fn, saved_layer, device_ids)
# Identifies correct device ordering based on device ids.
# key_list is a list of devices in appropriate ordering for concatenation.
Expand Down
4 changes: 2 additions & 2 deletions captum/attr/_core/gradient_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,9 +369,9 @@ def _scale_input(
inp_shape = (bsz,) + tuple([1] * len(inp_shape_wo_bsz))

# expand and reshape the indices
rand_coefficient = rand_coefficient.view(inp_shape).requires_grad_()
rand_coefficient = rand_coefficient.view(inp_shape)

input_baseline_scaled = (
rand_coefficient * input + (torch.tensor(1) - rand_coefficient) * baseline
)
).requires_grad_()
return input_baseline_scaled
144 changes: 76 additions & 68 deletions captum/attr/_core/noise_tunnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
_format_tensor_into_tuples,
_is_tuple,
)
from .._utils.attribution import Attribution
from .._utils.attribution import Attribution, GradientAttribution
from .._utils.common import _validate_noise_tunnel_type


Expand Down Expand Up @@ -63,7 +63,9 @@ def __init__(self, attribution_method: Attribution) -> None:
"""
self.attribution_method = attribution_method
self.is_delta_supported = self.attribution_method.has_convergence_delta()

self.is_gradient_method = isinstance(
self.attribution_method, GradientAttribution
)
Attribution.__init__(self, self.attribution_method.forward_func)

@log_usage()
Expand Down Expand Up @@ -165,7 +167,9 @@ def add_noise_to_inputs() -> Tuple[Tensor, ...]:
), "stdevs must be type float. " "Given: {}".format(type(stdevs))
stdevs_ = (stdevs,) * len(inputs)
return tuple(
add_noise_to_input(input, stdev)
add_noise_to_input(input, stdev).requires_grad_()
if self.is_gradient_method
else add_noise_to_input(input, stdev)
for (input, stdev) in zip(inputs, stdevs_)
)

Expand Down Expand Up @@ -199,81 +203,85 @@ def compute_expected_attribution_and_sq(attribution):
expected_attribution_sq = torch.mean(attribution ** 2, dim=1, keepdim=False)
return expected_attribution, expected_attribution_sq

# Keeps track whether original input is a tuple or not before
# converting it into a tuple.
is_inputs_tuple = isinstance(inputs, tuple)

inputs = _format_input(inputs)

_validate_noise_tunnel_type(nt_type, SUPPORTED_NOISE_TUNNEL_TYPES)

delta = None
inputs_with_noise = add_noise_to_inputs()
# if the algorithm supports targets, baselines and/or additional_forward_args
# they will be expanded based on the n_steps and corresponding kwargs
# variables will be updated accordingly
_expand_and_update_additional_forward_args(n_samples, kwargs)
_expand_and_update_target(n_samples, kwargs)
_expand_and_update_baselines(
inputs,
n_samples,
kwargs,
draw_baseline_from_distrib=draw_baseline_from_distrib,
)
with torch.no_grad():
# Keeps track whether original input is a tuple or not before
# converting it into a tuple.
is_inputs_tuple = isinstance(inputs, tuple)

inputs = _format_input(inputs)

_validate_noise_tunnel_type(nt_type, SUPPORTED_NOISE_TUNNEL_TYPES)

delta = None
inputs_with_noise = add_noise_to_inputs()
# if the algorithm supports targets, baselines and/or
# additional_forward_args they will be expanded based
# on the n_steps and corresponding kwargs
# variables will be updated accordingly
_expand_and_update_additional_forward_args(n_samples, kwargs)
_expand_and_update_target(n_samples, kwargs)
_expand_and_update_baselines(
inputs,
n_samples,
kwargs,
draw_baseline_from_distrib=draw_baseline_from_distrib,
)

# smoothgrad_Attr(x) = 1 / n * sum(Attr(x + N(0, sigma^2))
# NOTE: using __wrapped__ such that it does not log the inner logs
attributions = self.attribution_method.attribute.__wrapped__( # type: ignore
self.attribution_method, # self
inputs_with_noise if is_inputs_tuple else inputs_with_noise[0],
**kwargs,
)
# smoothgrad_Attr(x) = 1 / n * sum(Attr(x + N(0, sigma^2))
# NOTE: using __wrapped__ such that it does not log the inner logs
attr_func = self.attribution_method.attribute
attributions = attr_func.__wrapped__( # type: ignore
self.attribution_method, # self
inputs_with_noise if is_inputs_tuple else inputs_with_noise[0],
**kwargs,
)

return_convergence_delta = (
"return_convergence_delta" in kwargs and kwargs["return_convergence_delta"]
)
return_convergence_delta = (
"return_convergence_delta" in kwargs
and kwargs["return_convergence_delta"]
)

if self.is_delta_supported and return_convergence_delta:
attributions, delta = attributions
if self.is_delta_supported and return_convergence_delta:
attributions, delta = attributions

is_attrib_tuple = _is_tuple(attributions)
attributions = _format_tensor_into_tuples(attributions)
is_attrib_tuple = _is_tuple(attributions)
attributions = _format_tensor_into_tuples(attributions)

expected_attributions = []
expected_attributions_sq = []
for attribution in attributions:
expected_attr, expected_attr_sq = compute_expected_attribution_and_sq(
attribution
)
expected_attributions.append(expected_attr)
expected_attributions_sq.append(expected_attr_sq)
expected_attributions = []
expected_attributions_sq = []
for attribution in attributions:
expected_attr, expected_attr_sq = compute_expected_attribution_and_sq(
attribution
)
expected_attributions.append(expected_attr)
expected_attributions_sq.append(expected_attr_sq)

if NoiseTunnelType[nt_type] == NoiseTunnelType.smoothgrad:
return self._apply_checks_and_return_attributions(
tuple(expected_attributions),
is_attrib_tuple,
return_convergence_delta,
delta,
)

if NoiseTunnelType[nt_type] == NoiseTunnelType.smoothgrad:
return self._apply_checks_and_return_attributions(
tuple(expected_attributions),
is_attrib_tuple,
return_convergence_delta,
delta,
)
if NoiseTunnelType[nt_type] == NoiseTunnelType.smoothgrad_sq:
return self._apply_checks_and_return_attributions(
tuple(expected_attributions_sq),
is_attrib_tuple,
return_convergence_delta,
delta,
)

if NoiseTunnelType[nt_type] == NoiseTunnelType.smoothgrad_sq:
return self._apply_checks_and_return_attributions(
tuple(expected_attributions_sq),
is_attrib_tuple,
return_convergence_delta,
delta,
vargrad = tuple(
expected_attribution_sq - expected_attribution * expected_attribution
for expected_attribution, expected_attribution_sq in zip(
expected_attributions, expected_attributions_sq
)
)

vargrad = tuple(
expected_attribution_sq - expected_attribution * expected_attribution
for expected_attribution, expected_attribution_sq in zip(
expected_attributions, expected_attributions_sq
return self._apply_checks_and_return_attributions(
vargrad, is_attrib_tuple, return_convergence_delta, delta
)
)

return self._apply_checks_and_return_attributions(
vargrad, is_attrib_tuple, return_convergence_delta, delta
)

def _apply_checks_and_return_attributions(
self,
Expand Down
6 changes: 3 additions & 3 deletions tests/attr/neuron/test_neuron_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ def _gradient_matching_test_assert(
while len(neuron) < len(out.shape) - 1:
neuron = neuron + (0,)
input_attrib = Saliency(
lambda x: _forward_layer_eval(model, x, output_layer)[0][0][
(slice(None), *neuron)
]
lambda x: _forward_layer_eval(
model, x, output_layer, grad_enabled=True
)[0][0][(slice(None), *neuron)]
)
sal_vals = input_attrib.attribute(test_input, abs=False)
grad_vals = gradient_attrib.attribute(test_input, neuron)
Expand Down

0 comments on commit e5ea6fd

Please sign in to comment.