| Name | Last modified | Size | Description | |
|---|---|---|---|---|
| Parent Directory | - | |||
| util.py | 2025-02-02 11:49 | 2.7K | ||
| transformer_xl_test.py | 2025-02-02 11:49 | 9.2K | ||
| transformer_xl.py | 2025-02-02 11:49 | 22K | ||
| transformer_test.py | 2025-02-02 11:49 | 5.4K | ||
| transformer_scaffold..> | 2025-02-02 11:49 | 19K | ||
| transformer_scaffold.py | 2025-02-02 11:49 | 15K | ||
| transformer_encoder_..> | 2025-02-02 11:49 | 27K | ||
| transformer_encoder_..> | 2025-02-02 11:49 | 18K | ||
| transformer.py | 2025-02-02 11:49 | 20K | ||
| tn_transformer_test.py | 2025-02-02 11:49 | 8.7K | ||
| tn_transformer_expan..> | 2025-02-02 11:49 | 11K | ||
| tn_expand_condense_t..> | 2025-02-02 11:49 | 5.8K | ||
| tn_expand_condense.py | 2025-02-02 11:49 | 6.5K | ||
| text_layers_test.py | 2025-02-02 11:49 | 24K | ||
| text_layers.py | 2025-02-02 11:49 | 32K | ||
| talking_heads_attent..> | 2025-02-02 11:49 | 6.8K | ||
| talking_heads_attent..> | 2025-02-02 11:49 | 6.7K | ||
| spectral_normalizati..> | 2025-02-02 11:49 | 3.0K | ||
| spectral_normalizati..> | 2025-02-02 11:49 | 10K | ||
| self_attention_mask.py | 2025-02-02 11:49 | 2.1K | ||
| routing_test.py | 2025-02-02 11:49 | 2.2K | ||
| routing.py | 2025-02-02 11:49 | 4.4K | ||
| rezero_transformer_t..> | 2025-02-02 11:49 | 5.6K | ||
| rezero_transformer.py | 2025-02-02 11:49 | 12K | ||
| reuse_transformer_te..> | 2025-02-02 11:49 | 17K | ||
| reuse_transformer.py | 2025-02-02 11:49 | 15K | ||
| reuse_attention_test.py | 2025-02-02 11:49 | 14K | ||
| reuse_attention.py | 2025-02-02 11:49 | 25K | ||
| relative_attention_t..> | 2025-02-02 11:49 | 6.5K | ||
| relative_attention.py | 2025-02-02 11:49 | 20K | ||
| position_embedding_t..> | 2025-02-02 11:49 | 7.8K | ||
| position_embedding.py | 2025-02-02 11:49 | 11K | ||
| per_dim_scale_attent..> | 2025-02-02 11:49 | 1.7K | ||
| per_dim_scale_attent..> | 2025-02-02 11:49 | 3.3K | ||
| pack_optimization_te..> | 2025-02-02 11:49 | 2.7K | ||
| pack_optimization.py | 2025-02-02 11:49 | 10K | ||
| on_device_embedding_..> | 2025-02-02 11:49 | 8.4K | ||
| on_device_embedding.py | 2025-02-02 11:49 | 4.5K | ||
| multi_channel_attent..> | 2025-02-02 11:49 | 1.9K | ||
| multi_channel_attent..> | 2025-02-02 11:49 | 7.1K | ||
| moe_test.py | 2025-02-02 11:49 | 9.2K | ||
| moe.py | 2025-02-02 11:49 | 27K | ||
| mobile_bert_layers_t..> | 2025-02-02 11:49 | 11K | ||
| mobile_bert_layers.py | 2025-02-02 11:49 | 23K | ||
| mixing_test.py | 2025-02-02 11:49 | 3.5K | ||
| mixing.py | 2025-02-02 11:49 | 9.5K | ||
| mat_mul_with_margin_..> | 2025-02-02 11:49 | 2.0K | ||
| mat_mul_with_margin.py | 2025-02-02 11:49 | 2.2K | ||
| masked_softmax_test.py | 2025-02-02 11:49 | 4.3K | ||
| masked_softmax.py | 2025-02-02 11:49 | 2.9K | ||
| masked_lm_test.py | 2025-02-02 11:49 | 5.8K | ||
| masked_lm.py | 2025-02-02 11:49 | 4.8K | ||
| kernel_attention_tes..> | 2025-02-02 11:49 | 9.3K | ||
| kernel_attention.py | 2025-02-02 11:49 | 34K | ||
| gaussian_process_tes..> | 2025-02-02 11:49 | 9.9K | ||
| gaussian_process.py | 2025-02-02 11:49 | 20K | ||
| gated_feedforward_te..> | 2025-02-02 11:49 | 4.4K | ||
| gated_feedforward.py | 2025-02-02 11:49 | 9.5K | ||
| factorized_embedding..> | 2025-02-02 11:49 | 2.5K | ||
| factorized_embedding.py | 2025-02-02 11:49 | 2.8K | ||
| cls_head_test.py | 2025-02-02 11:49 | 8.7K | ||
| cls_head.py | 2025-02-02 11:49 | 16K | ||
| block_diag_feedforwa..> | 2025-02-02 11:49 | 4.1K | ||
| block_diag_feedforwa..> | 2025-02-02 11:49 | 7.1K | ||
| bigbird_attention_te..> | 2025-02-02 11:49 | 2.1K | ||
| bigbird_attention.py | 2025-02-02 11:49 | 21K | ||
| attention_test.py | 2025-02-02 11:49 | 3.4K | ||
| attention.py | 2025-02-02 11:49 | 3.8K | ||
| __pycache__/ | 2025-02-02 11:49 | - | ||
| __init__.py | 2025-02-02 11:49 | 4.7K | ||