Examples of Specific Methods
Bi-Level Optimization
Before setting up problem solver with BOHML, model and objective function need to be defined in advance. Here we take few shot learning as example.
Model and optimizer:
ul_model = bohml.utils.model.backbone.Res12([5, 3, 84, 84], ways, use_head=False)
ll_model = nn.Sequential(
nn.Flatten(),
nn.Linear(np.prod(ul_model.output_shape[1:]), ways)
)
ll_opt = torch.optim.SGD(ll_model.parameters(), lr=args.learning_rate)
ul_opt = torch.optim.Adam(ul_model.parameters())
LL and UL objective:
def ll_o(data, target, ul_model, ll_model):
out_f = ll_model(ul_model(data))
loss_f = F.cross_entropy(out_f, target)
return loss_f
def ul_o(data, target, ul_model, ll_model):
out_F = ll_model(ul_model(data))
loss_F = F.cross_entropy(out_F, target)
return loss_F
Then we could set desired method and correlative parameters.
RAD
optimizer = bohml.optimizer.BOHMLOptimizer(
"Feature", ll_method="Dynamic", ul_method="Recurrence",
ll_objective=ll_o, ul_objective=ul_o,
ll_model=ll_model, ul_model=ul_model)
optimizer.build_ll_solver(lower_loop, ll_opt)
optimizer.build_ul_solver(ul_opt)
TRAD
optimizer = bohml.optimizer.BOHMLOptimizer(
"Feature", ll_method="Dynamic", ul_method="Recurrence",
ll_objective=ll_o, ul_objective=ul_o,
ll_model=ll_model, ul_model=ul_model)
optimizer.build_ll_solver(lower_loop, ll_opt, truncate_iter=2)
optimizer.build_ul_solver(ul_opt)
OneStage-RAD
optimizer = bohml.optimizer.BOHMLOptimizer(
"Feature", ll_method="Dynamic", ul_method="Onestage",
ll_objective=ll_o, ul_objective=ul_o,
ll_model=ll_model, ul_model=ul_model)
optimizer.build_ll_solver(lower_loop=1, ll_opt)
optimizer.build_ul_solver(ul_opt)
LS
optimizer = bohml.optimizer.BOHMLOptimizer(
"Feature", ll_method="Implicit", ul_method="LS",
ll_objective=ll_o, ul_objective=ul_o,
ll_model=ll_model, ul_model=ul_model)
optimizer.build_ll_solver(lower_loop, ll_opt)
optimizer.build_ul_solver(ul_opt, k=10, tolerance=1e-10)
NS
optimizer = bohml.optimizer.BOHMLOptimizer(
"Feature", ll_method="Implicit", ul_method="NS",
ll_objective=ll_o, ul_objective=ul_o,
ll_model=ll_model, ul_model=ul_model)
optimizer.build_ll_solver(lower_loop, ll_opt)
optimizer.build_ul_solver(ul_opt, k=10, tolerance=1e-10)
GN
optimizer = bohml.optimizer.BOHMLOptimizer(
"Feature", ll_method="Implicit", ul_method="GN",
ll_objective=ll_o, ul_objective=ul_o,
ll_model=ll_model, ul_model=ul_model)
optimizer.build_ll_solver(lower_loop, ll_opt)
optimizer.build_ul_solver(ul_opt)
BDA
optimizer = bohml.optimizer.BOHMLOptimizer(
"Feature", ll_method="Dynamic", ul_method="Recurrence",
ll_objective=ll_o, ul_objective=ul_o,
ll_model=ll_model, ul_model=ul_model)
optimizer.build_ll_solver(lower_loop, ll_opt, alpha_init=0.4, alpha_decay=0.9)
optimizer.build_ul_solver(ul_opt)
BVFIM
optimizer = bohml.optimizer.BOHMLOptimizer(
"Feature", ll_method="BVFIM", ul_method="BVFIM",
ll_objective=ll_o, ul_objective=ul_o,
ll_model=ll_model, ul_model=ul_model)
optimizer.build_ll_solver(lower_loop, ll_opt, y_loop=5, ll_l2_reg=0.1,
ul_l2_reg=0.01, ul_ln_reg=10., reg_decay=True)
optimizer.build_ul_solver(ul_opt, ll_l2_reg=0.1, ul_l2_reg=0.01, ul_ln_reg=10.)
IAPTT-GM
IAPTT-GM method requires a UL objective function of a different format, which defined as:
def ul_o_iapttgm(data, target, ul_model, ll_model, time=-1):
if time>0:
out_F = ll_model(ul_model(data), params=ll_model.parameters(time=time))
loss_F = F.cross_entropy(out_F, target)
else:
out_F = ll_model(ul_model(data))
loss_F = F.cross_entropy(out_F, target)
return loss_F
Then set method parameters as usual:
optimizer = bohml.optimizer.BOHMLOptimizer(
"Feature", ll_method="Dynamic", ul_method="Recurrence",
ll_objective=ll_o, ul_objective=ul_o_iapttgm,
ll_model=ll_model, ul_model=ul_model)
optimizer.build_ll_solver(args.inner_loop, ll_opt, truncate_max_loss_iter=True)
optimizer.build_ul_solver(ul_opt, update_ll_model_init=True)
After setting problem solver, we could train our model with optimizer
:
# The data preparation part and evaluation function are omitted
with tqdm(train_dataloader, total=60000, desc="Meta Training Phase") as pbar:
for meta_iter, batch in enumerate(pbar):
tr_xs, tr_ys = batch["train"][0].to(device), batch["train"][1].to(device)
tst_xs, tst_ys = batch["test"][0].to(device), batch["test"][1].to(device)
val_loss, forward_time, backward_time =
optimizer.run_iter(tr_xs, tr_ys, tst_xs, tst_ys,
meta_iter, forward_with_whole_batch=False)
val_losses.append(val_loss)
if meta_iter % eval_interval == 0:
evaluate()
Initialization Optimization
As using BLO, we should also predefine our model and objective functions:
backbone_model = bohml.utils.model.backbone.Conv([5, 3, 84, 84],
ways, num_filters, use_head=True)
def i_o(data, target, model, params):
return F.cross_entropy(model(data, params), target)
def o_o(data, target, model, params):
return F.cross_entropy(model(data, params), target)
But when use initialization optimization methods, we need to wrap our
backbone model with bohml.utils.model.MetaModel
, and the method and correlative parameters
are also setting with it:
MAML
meta_model = bohml.utils.model.MetaModel(backbone_model)
outer_opt = torch.optim.Adam(meta_model.parameters())
optimizer = bohml.optimizer.BOHMLOptimizer("Initial", inner_objective=i_o,
outer_objective=o_o, meta_model=meta_model)
optimizer.build_meta_solver(outer_opt, inner_loop, inner_learning_rate=0.01,
use_second_order=True)
FOMAML
The procedure of FOMAML method is identical to MAML except for:
optimizer.build_meta_solver(outer_opt, inner_loop, inner_learning_rate=0.01,
use_second_order=False)
Note that the parameter use_second_order
could not only work on method MAML but
also on other methods which will use first-order approximation during outer optimization.
MSGD
Choose MSGD method when wrap backbone model:
meta_model = bohml.utils.model.MetaModel(backbone_model, learn_lr=True,
meta_lr=0.1)
MT-net
Choose MT-net method when wrap backbone model:
meta_model = bohml.utils.model.MetaModel(backbone_model, use_t=True)
Warp-grad
Choose Warp-grad method when wrap backbone model:
meta_model = bohml.utils.model.MetaModel(backbone_model, use_warp=True,
num_warp_layers=1)
L2F
Choose L2F method when wrap backbone model:
meta_model = bohml.utils.model.MetaModel(backbone_model, use_forget=True)
Training and evaluation procedures are the same as above.