Skip to content

Commit

Permalink
Merge branch 'PaddlePaddle:develop' into C18
Browse files Browse the repository at this point in the history
  • Loading branch information
Liyulingyue authored Feb 29, 2024
2 parents 981a916 + 7d84d55 commit 5ffa617
Show file tree
Hide file tree
Showing 31 changed files with 1,906 additions and 613 deletions.
2 changes: 2 additions & 0 deletions paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@ if(WITH_XPU)
${XPU_PASS_DEPS})
pass_library(sine_pos_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS})
pass_library(quant_dequant_xpu_pass inference DIR xpu DEPS ${XPU_PASS_DEPS})
pass_library(roformer_relative_pos_fuse_pass inference DIR xpu DEPS
${XPU_PASS_DEPS})
endif()

cc_library(
Expand Down
42 changes: 39 additions & 3 deletions paddle/fluid/framework/ir/constant_folding_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,20 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/ir/constant_folding_pass.h"

#include <string>
#include <vector>
#include "glog/logging.h"

#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"

#include "paddle/fluid/framework/convert_utils.h"

namespace paddle {
namespace framework {
namespace ir {
Expand All @@ -51,6 +53,37 @@ struct ConstantFolding : public PatternBase {
};
} // namespace patterns

namespace {
std::unordered_set<std::string> GetControlFlowVarNames(ir::Graph *graph) {
std::unordered_set<std::string> control_flow_ops{"while",
"conditional_block"};
std::unordered_set<std::string> control_flow_var_names;
for (auto *node : graph->Nodes()) {
if (!node->IsOp() || control_flow_ops.count(node->Op()->Type()) == 0)
continue;
for (auto const &in_names : node->Op()->Inputs()) {
auto var_names = in_names.second;
control_flow_var_names.insert(var_names.begin(), var_names.end());
}
for (auto const &out_names : node->Op()->Outputs()) {
auto var_names = out_names.second;
control_flow_var_names.insert(var_names.begin(), var_names.end());
}
}
return control_flow_var_names;
}

bool OutputUsedByControlFlow(ir::Node *node,
const std::unordered_set<std::string> &cf_vars) {
for (auto out_node : node->outputs) {
if (cf_vars.count(out_node->Name())) {
return true;
}
}
return false;
}
} // namespace

ConstantFoldingPass::ConstantFoldingPass() = default;

void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const {
Expand All @@ -69,6 +102,7 @@ void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const {
"save",
"quantize_linear",
"dequantize_linear"};
const auto cf_vars = GetControlFlowVarNames(graph);
int folded_op_num = 0;

auto op_node_sorted = framework::ir::TopologyVariantSort(
Expand All @@ -78,7 +112,9 @@ void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const {
if (std::find(blacklist.begin(), blacklist.end(), op_node->Name()) !=
blacklist.end())
continue;

if (OutputUsedByControlFlow(op_node, cf_vars)) {
continue;
}
bool input_persis = true;
// map is used to record how many time a name string occurs in the whole
// graph's nodes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ namespace ir {
namespace patterns {

struct AdaptiveSeqlenPatternV1 : public PatternBase {
AdaptiveSeqlenPatternV1(PDPattern* pattern, const std::string& name_scope);
AdaptiveSeqlenPatternV1(PDPattern* pattern,
const std::string& name_scope,
const std::string& matmul_type);

// declare operator node's name
PATTERN_DECL_NODE(embedding_xpu);
Expand All @@ -44,7 +46,8 @@ struct AdaptiveSeqlenPatternV1 : public PatternBase {
};

AdaptiveSeqlenPatternV1::AdaptiveSeqlenPatternV1(PDPattern* pattern,
const std::string& name_scope)
const std::string& name_scope,
const std::string& matmul_type)
: PatternBase(pattern, name_scope, name_scope) {
auto* embedding_xpu = pattern->NewNode(embedding_xpu_repr())
->assert_is_op("embedding_with_eltwise_add_xpu");
Expand All @@ -59,11 +62,11 @@ AdaptiveSeqlenPatternV1::AdaptiveSeqlenPatternV1(PDPattern* pattern,
->assert_is_op_input("multi_encoder_xpu", "x");

auto* mask = pattern->NewNode(mask_repr())
->assert_is_op_input("matmul", "X")
->assert_is_op_input("matmul", "Y");
auto* matmul = pattern->NewNode(matmul_repr())->assert_is_op("matmul");
->assert_is_op_input(matmul_type, "X")
->assert_is_op_input(matmul_type, "Y");
auto* matmul = pattern->NewNode(matmul_repr())->assert_is_op(matmul_type);
auto* matmul_out = pattern->NewNode(matmul_out_repr())
->assert_is_op_output("matmul", "Out")
->assert_is_op_output(matmul_type, "Out")
->assert_is_op_input("scale", "X");
auto* scale = pattern->NewNode(scale_repr())->assert_is_op("scale");
auto* scale_out = pattern->NewNode(scale_out_repr())
Expand All @@ -88,9 +91,10 @@ AdaptiveSeqlenPatternV1::AdaptiveSeqlenPatternV1(PDPattern* pattern,
} // namespace patterns

int MultiEncoderXPUAdaptiveSeqlenFusePass::ApplyAdaptiveSeqlenPassV1(
ir::Graph* graph) const {
ir::Graph* graph, const std::string& matmul_type) const {
GraphPatternDetector gpd;
patterns::AdaptiveSeqlenPatternV1 pattern(gpd.mutable_pattern(), name_scope_);
patterns::AdaptiveSeqlenPatternV1 pattern(
gpd.mutable_pattern(), name_scope_, matmul_type);

int found_subgraph_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Expand Down Expand Up @@ -143,7 +147,9 @@ int MultiEncoderXPUAdaptiveSeqlenFusePass::ApplyAdaptiveSeqlenPassV1(
namespace patterns {

struct AdaptiveSeqlenPatternV2 : public PatternBase {
AdaptiveSeqlenPatternV2(PDPattern* pattern, const std::string& name_scope);
AdaptiveSeqlenPatternV2(PDPattern* pattern,
const std::string& name_scope,
const std::string& matmul_type);

// declare operator node's name
PATTERN_DECL_NODE(embedding_xpu);
Expand Down Expand Up @@ -172,7 +178,8 @@ struct AdaptiveSeqlenPatternV2 : public PatternBase {
};

AdaptiveSeqlenPatternV2::AdaptiveSeqlenPatternV2(PDPattern* pattern,
const std::string& name_scope)
const std::string& name_scope,
const std::string& matmul_type)
: PatternBase(pattern, name_scope, name_scope) {
auto* embedding_xpu = pattern->NewNode(embedding_xpu_repr())
->assert_is_op("embedding_with_eltwise_add_xpu");
Expand Down Expand Up @@ -201,11 +208,11 @@ AdaptiveSeqlenPatternV2::AdaptiveSeqlenPatternV2(PDPattern* pattern,
pattern->NewNode(unsqueeze_0_repr())->assert_is_op("unsqueeze2");
auto* unsqueeze_0_out = pattern->NewNode(unsqueeze_0_out_repr())
->assert_is_op_output("unsqueeze2", "Out")
->assert_is_op_input("matmul_v2", "X")
->assert_is_op_input("matmul_v2", "Y");
auto* matmul = pattern->NewNode(matmul_repr())->assert_is_op("matmul_v2");
->assert_is_op_input(matmul_type, "X")
->assert_is_op_input(matmul_type, "Y");
auto* matmul = pattern->NewNode(matmul_repr())->assert_is_op(matmul_type);
auto* matmul_out = pattern->NewNode(matmul_out_repr())
->assert_is_op_output("matmul_v2", "Out")
->assert_is_op_output(matmul_type, "Out")
->assert_is_op_input("scale", "X");
auto* scale_0 = pattern->NewNode(scale_0_repr())->assert_is_op("scale");
auto* scale_0_out = pattern->NewNode(scale_0_out_repr())
Expand Down Expand Up @@ -244,9 +251,10 @@ AdaptiveSeqlenPatternV2::AdaptiveSeqlenPatternV2(PDPattern* pattern,
} // namespace patterns

int MultiEncoderXPUAdaptiveSeqlenFusePass::ApplyAdaptiveSeqlenPassV2(
ir::Graph* graph) const {
ir::Graph* graph, const std::string& matmul_type) const {
GraphPatternDetector gpd;
patterns::AdaptiveSeqlenPatternV2 pattern(gpd.mutable_pattern(), name_scope_);
patterns::AdaptiveSeqlenPatternV2 pattern(
gpd.mutable_pattern(), name_scope_, matmul_type);

int found_subgraph_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Expand Down Expand Up @@ -324,9 +332,13 @@ void MultiEncoderXPUAdaptiveSeqlenFusePass::ApplyImpl(ir::Graph* graph) const {
PADDLE_ENFORCE_NOT_NULL(
graph, platform::errors::PreconditionNotMet("graph should not be null."));
Init(name_scope_, graph);
std::vector<std::string> matmul_types{"matmul", "matmul_v2"};
int found_subgraph_count = 0;
for (auto& matmul_type : matmul_types) {
found_subgraph_count += ApplyAdaptiveSeqlenPassV1(graph, matmul_type);
found_subgraph_count += ApplyAdaptiveSeqlenPassV2(graph, matmul_type);
}

int found_subgraph_count = ApplyAdaptiveSeqlenPassV1(graph);
found_subgraph_count += ApplyAdaptiveSeqlenPassV2(graph);
AddStatis(found_subgraph_count);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ class MultiEncoderXPUAdaptiveSeqlenFusePass : public FusePassBase {
|
out_var*
*/
int ApplyAdaptiveSeqlenPassV1(ir::Graph* graph) const;
int ApplyAdaptiveSeqlenPassV1(ir::Graph* graph,
const std::string& matmul_type) const;

/*
adaptive seqlen V2, before:
Expand Down Expand Up @@ -132,7 +133,8 @@ class MultiEncoderXPUAdaptiveSeqlenFusePass : public FusePassBase {
|
out_var*
*/
int ApplyAdaptiveSeqlenPassV2(ir::Graph* graph) const;
int ApplyAdaptiveSeqlenPassV2(ir::Graph* graph,
const std::string& matmul_type) const;

private:
const std::string name_scope_{"multi_encoder_xpu_adaptive_seqlen_fuse_pass"};
Expand Down
Loading

0 comments on commit 5ffa617

Please sign in to comment.