Skip to content

Commit

Permalink
remove the AdamOptimizer、SGDOptimizer、MomentumOptimizer、ModelAverage、…
Browse files Browse the repository at this point in the history
…LookaheadOptimizer、FtrlOptimizer、DecayedAdagradOptimizer、DpsgdOptimizer in fluid and relocate the ExponentialMovingAverage、PipelineOptimizer、GradientMergeOptimizer and change optimizer base for LarsMomentumOptimizer and RecomputeOptimizer (#55970)

* change the optimizer base for SGDOptimizer

* change the optimizer base for SGDOptimizer

* replace the SGDOptimizer with SGD

* fix bug of sgd

* change the optimizer base for MomentumOptimizer

* fix the remaining tests

* remove the Momentum in fluid/optimizer.py

* fix bug

* fix bug

* fix bug

* fix bug

* Update test_resnet_cinn.py

* Update test_resnet_prim_cinn.py

* fix bug

* fix bug

* fix bug

* remove the ModelAverage in fluid

* remove the LookaheadOptimizer in fluid

* fix bug

* remove AdamOptimizer in fluid

* Update test_image_classification_fp16.py

* fix bug

* relocate the ExponentialMovingAverage in fluid

* restore the static api

* remove the FtrlOptimizer in fluid

* remove the DecayedAdagradOptimizer in fluid

* remove the DpsgdOptimizer in fluid

* fix bug

* fix codestyle

* fix bug

* fix bug

* relocate the PipelineOptimizer

* relocate the GradientMergeOptimizer

* fix bug

* fix bug

* fix bug

* fix doc

* Update __init__.py

* Update test_fleet_qat_meta_optimizer.py

* change optimizer base for LarsMomentumOptimizer

* fix bug

* fix conflict

* fix code-style

* fix sample codes

* fix bug

* fix bug

* fix cinn bug

* fix bug

* fix bug

* Update qat_optimizer.py

* Update __init__.py

* fix bug

* change optimizer base for RecomputeOptimizer

* fix bug

* fix bug

* Update test_imperative_optimizer_v2.py
  • Loading branch information
longranger2 committed Aug 9, 2023
1 parent 9ec0bdd commit 723c6f7
Show file tree
Hide file tree
Showing 299 changed files with 4,399 additions and 7,465 deletions.
12 changes: 6 additions & 6 deletions python/paddle/amp/grad_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ class AmpScaler:
data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
model = paddle.nn.Conv2D(3, 2, 3)
optimizer = paddle.optimizer.SGDOptimizer(
learning_rate=0.01, parameter_list=model.parameters())
optimizer = paddle.optimizer.SGD(
learning_rate=0.01, parameters=model.parameters())
scaler = paddle.amp.AmpScaler(init_loss_scaling=1024)
data = paddle.to_tensor(data)
with paddle.amp.amp_guard():
Expand Down Expand Up @@ -168,8 +168,8 @@ def scale(self, var):
data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
model = paddle.nn.Conv2D(3, 2, 3)
optimizer = paddle.optimizer.SGDOptimizer(
learning_rate=0.01, parameter_list=model.parameters())
optimizer = paddle.optimizer.SGD(
learning_rate=0.01, parameters=model.parameters())
scaler = paddle.amp.AmpScaler(init_loss_scaling=1024)
data = paddle.to_tensor(data)
with paddle.amp.amp_guard():
Expand Down Expand Up @@ -221,8 +221,8 @@ def minimize(self, optimizer, *args, **kwargs):
data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
model = paddle.nn.Conv2D(3, 2, 3)
optimizer = paddle.optimizer.SGDOptimizer(
learning_rate=0.01, parameter_list=model.parameters())
optimizer = paddle.optimizer.SGD(
learning_rate=0.01, parameters=model.parameters())
scaler = paddle.amp.AmpScaler(init_loss_scaling=1024)
data = paddle.to_tensor(data)
with paddle.amp.amp_guard():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
import paddle
from paddle.common_ops_import import LayerHelper
from paddle.fluid.dygraph import base as imperative_base
from paddle.fluid.optimizer import Momentum, Optimizer
from paddle.fluid.optimizer import Optimizer
from paddle.framework import core, in_dynamic_mode
from paddle.nn.clip import ClipGradByNorm, append_gradient_clip_ops
from paddle.optimizer import Momentum
from paddle.regularizer import L1Decay, L2Decay
from paddle.static import create_global_var

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,6 @@ def fp16_compression(param_and_grads):

def apply_optimize(self, loss, startup_program, params_grads):
new_params_grads = self.fp16_compression(params_grads)
return self.inner_opt.apply_optimize(
return self.inner_opt._apply_optimize(
loss, startup_program=startup_program, params_grads=new_params_grads
)
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

from paddle.fluid.optimizer import GradientMergeOptimizer as GM
from paddle.incubate.optimizer import GradientMergeOptimizer as GM

from .meta_optimizer_base import MetaOptimizerBase

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import logging

import paddle
from paddle.fluid.optimizer import AdamOptimizer
from paddle.optimizer import Adam

from .meta_optimizer_base import MetaOptimizerBase

Expand All @@ -38,7 +38,7 @@ def _set_basic_info(
)

opt = self.inner_opt
if not isinstance(opt, AdamOptimizer):
if not isinstance(opt, Adam):
return

configs = self.user_defined_strategy.lamb_configs
Expand Down Expand Up @@ -72,7 +72,7 @@ def _can_apply(self):
return False

if self.user_defined_strategy.lamb:
if not isinstance(self.inner_opt, AdamOptimizer):
if not isinstance(self.inner_opt, Adam):
logging.warn(
"lamb need the inner optimizer to be AdamOptimizer optimizer but got {}.".format(
self.inner_opt.type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@

import logging

from paddle.fluid.optimizer import LarsMomentumOptimizer, Momentum
from paddle.incubate.optimizer import LarsMomentumOptimizer
from paddle.optimizer import Momentum

from .meta_optimizer_base import MetaOptimizerBase

Expand Down Expand Up @@ -98,7 +99,7 @@ def apply_gradients(self, params_grads):
return self.lars_opt.apply_gradients(params_grads=params_grads)

def apply_optimize(self, loss, startup_program, params_grads):
return self.lars_opt.apply_optimize(
return self.lars_opt._apply_optimize(
loss, startup_program=startup_program, params_grads=params_grads
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ def _can_apply(self):
self.inner_opt,
(
paddle.optimizer.momentum.Momentum,
paddle.fluid.optimizer.Momentum,
paddle.optimizer.sgd.SGD,
paddle.fluid.optimizer.SGD,
),
)

Expand Down Expand Up @@ -235,9 +233,7 @@ def _can_apply(self):
self.inner_opt,
(
paddle.optimizer.Momentum,
paddle.fluid.optimizer.Momentum,
paddle.optimizer.sgd.SGD,
paddle.fluid.optimizer.SGD,
),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.fluid.optimizer import Optimizer
from paddle.optimizer import Optimizer

__all__ = []

Expand Down Expand Up @@ -81,7 +81,7 @@ def backward(
)

def apply_optimize(self, loss, startup_program, params_grads):
return self.inner_opt.apply_optimize(
return self.inner_opt._apply_optimize(
loss, startup_program=startup_program, params_grads=params_grads
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import subprocess

import paddle
from paddle import fluid
from paddle.framework import core

from ..base.private_helper_function import wait_server_ready
Expand Down Expand Up @@ -293,7 +292,7 @@ def get_sys_free_mem():
% (platform.system())
)

if not isinstance(self.inner_opt, fluid.optimizer.SGDOptimizer):
if not isinstance(self.inner_opt, paddle.optimizer.SGD):
return False

free = get_sys_free_mem()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and

import paddle
from paddle.fluid.optimizer import PipelineOptimizer as PO
from paddle.incubate.optimizer import PipelineOptimizer as PO

from .common import (
OP_ROLE_KEY,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def get_sys_free_mem():
% (platform.system())
)

if not isinstance(self.inner_opt, paddle.fluid.optimizer.SGDOptimizer):
if not isinstance(self.inner_opt, paddle.optimizer.SGD):
return False

free = get_sys_free_mem()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ def minimize_impl(
):
optimize_ops, params_grads = self.inner_opt.minimize(
loss,
startup_program=startup_program,
parameter_list=parameter_list,
no_grad_set=no_grad_set,
startup_program,
parameter_list,
no_grad_set,
)
device = paddle.device.get_device()
place = paddle.set_device(device)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

from paddle.fluid.optimizer import RecomputeOptimizer as RO
from paddle.incubate.optimizer import RecomputeOptimizer as RO

from .meta_optimizer_base import MetaOptimizerBase

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import os

from paddle.fluid import core
from paddle.fluid.optimizer import PipelineOptimizer
from paddle.incubate.optimizer import PipelineOptimizer
from paddle.static import (
create_global_var,
default_startup_program,
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/passes/auto_parallel_fp16.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ def _apply_single_impl(self, main_program, startup_program, context):

if self.target_dtype == "fp16":
if isinstance(
base_opt, (paddle.static.Adam, paddle.optimizer.AdamW)
base_opt, (paddle.optimizer.Adam, paddle.optimizer.AdamW)
):
with main_program._optimized_guard([]):
# found_inf = paddle.tensor.creation._memcpy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ class DistributeTranspiler:
cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_loss = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_loss)
# for pserver mode
Expand Down
Loading

0 comments on commit 723c6f7

Please sign in to comment.