diff --git a/src/sparsezoo/analyze_v2/memory_access_analysis.py b/src/sparsezoo/analyze_v2/memory_access_analysis.py index 698ae540a..fd976ba3f 100644 --- a/src/sparsezoo/analyze_v2/memory_access_analysis.py +++ b/src/sparsezoo/analyze_v2/memory_access_analysis.py @@ -73,7 +73,7 @@ def get_quantization(self) -> List["QuantizationAnalysisSchema"]: :returns: List of quantization analysis pydantic models for each grouping if the node has weights """ - data = get_memeory_access_bits(self.model_graph, self.node, self.node_shape) + data = get_memory_access_bits(self.model_graph, self.node, self.node_shape) if data is not None: quantization_analysis_model = [] for grouping, counts_dict in data.items(): @@ -152,7 +152,7 @@ def get_memory_access_counts( } -def get_memeory_access_bits( +def get_memory_access_bits( model_graph: ONNXGraph, node: NodeProto, node_shape: Dict, @@ -164,12 +164,15 @@ def get_memeory_access_bits( ) node_weight = get_node_weight(model_graph, node) precision = get_numpy_quantization_level(node_weight) - bits = memory_access_counts["single"]["counts"] * precision - bits_quant = bits * is_quantized_layer(model_graph, node) + counts = memory_access_counts["single"]["counts"] + bits = counts * precision + is_quantized = is_quantized_layer(model_graph, node) return { "tensor": { "bits": bits, - "bits_quant": bits_quant, + "bits_quant": bits * is_quantized, + "counts": counts, + "counts_quant": counts * is_quantized, } } diff --git a/src/sparsezoo/analyze_v2/model_analysis.py b/src/sparsezoo/analyze_v2/model_analysis.py index 39420b12a..c165f7de9 100644 --- a/src/sparsezoo/analyze_v2/model_analysis.py +++ b/src/sparsezoo/analyze_v2/model_analysis.py @@ -78,10 +78,10 @@ def calculate_sparsity_percentage(self, category: Dict): counts = category["counts"] return (counts_sparse / counts) * 100 if counts != 0 else 0 - def calculate_quantized_percentage(self, tensor: Dict): - bits_quant = tensor["bits_quant"] - bits = tensor["bits"] - return (bits_quant / bits) * 100 if bits != 0 else 0 + def calculate_quantized_percentage(self, tensor: Dict, counts_prefix: str): + counts_quant = tensor[f"{counts_prefix}_quant"] + counts = tensor[counts_prefix] + return (counts_quant / counts) * 100 if counts != 0 else 0 def __repr__(self): data = self.to_dict() @@ -93,7 +93,7 @@ def __repr__(self): ) param_size = summaries["params"]["quantization"]["tensor"]["bits"] param_quantized = self.calculate_quantized_percentage( - summaries["params"]["quantization"]["tensor"] + summaries["params"]["quantization"]["tensor"], "counts" ) ops_total = summaries["ops"]["sparsity"]["single"]["counts"] @@ -102,7 +102,7 @@ def __repr__(self): ) ops_size = summaries["ops"]["quantization"]["tensor"]["bits"] ops_quantized = self.calculate_quantized_percentage( - summaries["ops"]["quantization"]["tensor"] + summaries["ops"]["quantization"]["tensor"], "counts" ) mem_access_total = summaries["mem_access"]["sparsity"]["single"]["counts"] @@ -111,7 +111,7 @@ def __repr__(self): ) mem_access_size = summaries["mem_access"]["quantization"]["tensor"]["bits"] mem_access_quantized = self.calculate_quantized_percentage( - summaries["mem_access"]["quantization"]["tensor"] + summaries["mem_access"]["quantization"]["tensor"], "counts" ) return ( diff --git a/src/sparsezoo/analyze_v2/operation_analysis.py b/src/sparsezoo/analyze_v2/operation_analysis.py index baf40eece..d6448d82c 100644 --- a/src/sparsezoo/analyze_v2/operation_analysis.py +++ b/src/sparsezoo/analyze_v2/operation_analysis.py @@ -166,22 +166,23 @@ def get_operation_bits( precision = get_numpy_quantization_level(node_weight) is_quantized_op = "32" not in str(precision) - bits = ( - ops["single"]["counts"] + ops["single"]["counts_sparse"] - ) * precision - - bits_block4 = ( - ops["block4"]["counts"] + ops["block4"]["counts_sparse"] - ) * precision - - bits_quant = is_quantized_op * bits + single_counts = ops["single"]["counts"] + single_counts_sparse = ops["single"]["counts_sparse"] + single_bits = (single_counts - single_counts_sparse) * precision + block4_counts = ops["block4"]["counts"] + block4_counts_sparse = ops["block4"]["counts_sparse"] + block4_bits = (block4_counts - block4_counts_sparse) * precision return { "tensor": { - "bits": bits, - "bits_quant": bits_quant, + "counts": single_counts, + "counts_quant": is_quantized_op * single_counts, + "bits": single_bits, + "bits_quant": is_quantized_op * single_bits, }, "block4": { - "bits": bits_block4, - "bits_quant": bits_quant, + "counts": block4_counts, + "counts_quant": is_quantized_op * block4_counts, + "bits": block4_bits, + "bits_quant": is_quantized_op * block4_bits, }, } diff --git a/src/sparsezoo/analyze_v2/parameter_analysis.py b/src/sparsezoo/analyze_v2/parameter_analysis.py index 90a913435..4583d686b 100644 --- a/src/sparsezoo/analyze_v2/parameter_analysis.py +++ b/src/sparsezoo/analyze_v2/parameter_analysis.py @@ -29,7 +29,7 @@ get_node_num_four_block_zeros_and_size, get_node_param_counts, get_node_weight, - get_node_weight_bits, + get_node_weight_precision, get_numpy_distribution_statistics, get_numpy_entropy, get_numpy_modes, @@ -153,14 +153,17 @@ def get_parameter_bits( If the layer is quantized, assume all its elements in the ndarray are quantized """ - node_weight = get_node_weight(model_graph, node) - if node_weight is not None and node_weight.size > 0: - bits = get_node_weight_bits(model_graph, node) - + num_weights, num_bias, num_sparse_weights = get_node_param_counts(node, model_graph) + if num_weights > 0: + precision = get_node_weight_precision(model_graph, node) + is_quantized = is_quantized_layer(model_graph, node) + num_non_sparse_weights = num_weights - num_sparse_weights + num_bias return { "tensor": { - "bits": bits, - "bits_quant": bits * is_quantized_layer(model_graph, node), + "counts": num_weights, + "counts_quant": num_weights * is_quantized, + "bits": num_non_sparse_weights * precision, + "bits_quant": num_non_sparse_weights * precision * is_quantized, }, } diff --git a/src/sparsezoo/analyze_v2/schemas/quantization_analysis.py b/src/sparsezoo/analyze_v2/schemas/quantization_analysis.py index e7244a772..b0a43120e 100644 --- a/src/sparsezoo/analyze_v2/schemas/quantization_analysis.py +++ b/src/sparsezoo/analyze_v2/schemas/quantization_analysis.py @@ -20,6 +20,14 @@ class QuantizationSummaryAnalysisSchema(BaseModel): + counts: float = Field(..., description="Total number of weights") + counts_quant: int = Field( + ..., + description=( + "Total number of quantized weights." + "Here we assume if the layer is quantized, the entire array is quantized" + ), + ) bits: float = Field(..., description="Total bits required to store the weights") bits_quant: int = Field( ..., @@ -39,9 +47,9 @@ def validate_types(cls, value): @validator("percent", pre=True, always=True) def calculate_percent_if_none(cls, value, values): if value is None: - bits = values.get("bits", 0) - bits_quant = values.get("bits_quant", 0) - return bits_quant / bits if bits > 0 else 0.0 + counts = values.get("counts", 0) + counts_quant = values.get("counts_quant", 0) + return counts_quant / counts if counts > 0 else 0.0 return value def __add__(self, model: BaseModel): @@ -51,7 +59,9 @@ def __add__(self, model: BaseModel): if validator_model is not None: return validator_model( + counts=self.counts + model.counts, bits=self.bits + model.bits, + counts_quant=self.counts_quant + model.counts_quant, bits_quant=self.bits_quant + model.bits_quant, ) @@ -67,6 +77,8 @@ def __add__(self, model: BaseModel): if validator_model is not None and self.grouping == model.grouping: return validator_model( grouping=self.grouping, + counts=self.counts + model.counts, bits=self.bits + model.bits, + counts_quant=self.counts_quant + model.counts_quant, bits_quant=self.bits_quant + model.bits_quant, ) diff --git a/src/sparsezoo/utils/onnx/analysis.py b/src/sparsezoo/utils/onnx/analysis.py index 9ab579b7d..35c1d56f5 100644 --- a/src/sparsezoo/utils/onnx/analysis.py +++ b/src/sparsezoo/utils/onnx/analysis.py @@ -48,7 +48,7 @@ "get_numpy_distribution_statistics", "get_numpy_quantization_level", "get_numpy_bits", - "get_node_weight_bits", + "get_node_weight_precision", "get_node_param_counts", "get_node_kernel_shape", ] @@ -485,13 +485,13 @@ def get_node_param_counts( return params, bias, sparse_params -def get_node_weight_bits( +def get_node_weight_precision( model_graph: ONNXGraph, node: NodeProto, ) -> int: - """Get the bits needed to store the node weights""" + """Get the precision of the node in number of bits""" node_weight = get_node_weight(model_graph, node) - return get_numpy_bits(node_weight) + return get_numpy_quantization_level(node_weight) def get_numpy_bits(arr: numpy.ndarray) -> int: