[fairseq]translation task model

2019-04-29  本文已影响0人  VanJordan

translation.py

parts = filename.split('.')
if len(parts) >= 3 and len(parts[1].split('-')) == 2:
    return parts[1].split('-')
src_dict = cls.load_dictionary(os.path.join(args.data[0], 'dict.{}.txt'.format(args.source_lang)))
tgt_dict = cls.load_dictionary(os.path.join(args.data[0], 'dict.{}.txt'.format(args.target_lang)))

fairseq_task.py

def build_dictionary(cls, filenames, workers=1, threshold=-1, nwords=-1, padding_factor=8):
    """Build the dictionary

    Args:
        filenames (list): list of filenames
        workers (int): number of concurrent workers
        threshold (int): defines the minimum word count
        nwords (int): defines the total number of words in the final dictionary,
            including special symbols
        padding_factor (int): can be used to pad the dictionary size to be a
            multiple of 8, which is important on some hardware (e.g., Nvidia
            Tensor Cores).
    """
    d = Dictionary()
    for filename in filenames:
        Dictionary.add_file_to_dictionary(filename, d, tokenizer.tokenize_line, workers)
    d.finalize(threshold=threshold, nwords=nwords, padding_factor=padding_factor)
    return d
def train_step(self, sample, model, criterion, optimizer, ignore_grad=False):
    """
    Do forward and backward, and return the loss as computed by *criterion*
    for the given *model* and *sample*.

    Args:
        sample (dict): the mini-batch. The format is defined by the
            :class:`~fairseq.data.FairseqDataset`.
        model (~fairseq.models.BaseFairseqModel): the model
        criterion (~fairseq.criterions.FairseqCriterion): the criterion
        optimizer (~fairseq.optim.FairseqOptimizer): the optimizer
        ignore_grad (bool): multiply loss by 0 if this is set to True

    Returns:
        tuple:
            - the loss
            - the sample size, which is used as the denominator for the
                gradient
            - logging outputs to display while training
    """
    model.train()
    loss, sample_size, logging_output = criterion(model, sample)
    if ignore_grad:
        loss *= 0
    optimizer.backward(loss)
    return loss, sample_size, logging_output
def inference_step(self, generator, models, sample, prefix_tokens=None):
    with torch.no_grad():
        return generator.generate(models, sample, prefix_tokens=prefix_tokens)

sequence_generator.py

sampling (bool, optional): sample outputs instead of beam search
         (default: False)
sampling_topk (int, optional): only sample among the top-k choices
         at each step (default: -1)
sampling_temperature (float, optional): temperature for sampling,
        where values >1.0 produces more uniform sampling and values
        <1.0 produces sharper sampling (default: 1.0)
encoder_input = {
    k: v for k, v in sample['net_input'].items()
    if k != 'prev_output_tokens'
}
encoder_outs = model.forward_encoder(encoder_input)
reorder_state = None
batch_idxs = None
for step in range(max_len + 1):  # one extra step for EOS marker
    # reorder decoder internal states based on the prev choice of beams
    if reorder_state is not None:
        if batch_idxs is not None:
            # update beam indices to take into account removed sentences
            corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(batch_idxs)
            reorder_state.view(-1, beam_size).add_(corr.unsqueeze(-1) * beam_size)
        model.reorder_incremental_state(reorder_state)
        model.reorder_encoder_out(encoder_outs, reorder_state)

    lprobs, avg_attn_scores = model.forward_decoder(tokens[:, :step + 1], encoder_outs)

    lprobs[:, self.pad] = -math.inf  # never select pad
    lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty
# bsz是batch size
# list of completed sentences
finalized = [[] for i in range(bsz)]
finished = [False for i in range(bsz)]
worst_finalized = [{'idx': None, 'score': -math.inf} for i in range(bsz)]
...
# the return of generator in sequence_generator.py
# sort by score descending
for sent in range(len(finalized)):
    finalized[sent] = sorted(finalized[sent], key=lambda r: r['score'], reverse=True)

return finalized

search.step

首先厘清trainer.py,task和model和train的关系

task

train.py

max_epoch = args.max_epoch or math.inf
max_update = args.max_update or math.inf

trainer.py

model

fairseq_model

BaseFairseqModel

FairseqMultiModel

TransformerModel

transformerEncoder
transformerEncoderLayer
self.in_proj_weight = Parameter(torch.Tensor(3 * embed_dim, embed_dim))
TransformerDecoderLayer
上一篇 下一篇

猜你喜欢

热点阅读