| Name | Last modified | Size | Description | |
|---|---|---|---|---|
| Parent Directory | - | |||
| alias_passthrough_pa..> | 2025-02-02 11:49 | 1.7K | ||
| all_reduce_blueconne..> | 2025-02-02 11:49 | 1.8K | ||
| analytical_latency_e..> | 2025-02-02 11:49 | 2.2K | ||
| autotuner_compile_ut..> | 2025-02-02 11:49 | 4.1K | ||
| autotuner_util.h | 2025-02-02 11:49 | 10K | ||
| backend_configs.pb.h | 2025-02-02 11:49 | 143K | ||
| buffer_allocations.h | 2025-02-02 11:49 | 3.6K | ||
| buffer_comparator.h | 2025-02-02 11:49 | 1.8K | ||
| buffer_sharing.h | 2025-02-02 11:49 | 1.4K | ||
| compile_module_to_ll..> | 2025-02-02 11:49 | 3.2K | ||
| conditional_thunk.h | 2025-02-02 11:49 | 2.6K | ||
| conv_algorithm_picker.h | 2025-02-02 11:49 | 6.3K | ||
| conv_layout_normaliz..> | 2025-02-02 11:49 | 1.2K | ||
| convolution_thunk.h | 2025-02-02 11:49 | 3.4K | ||
| copy_fusion.h | 2025-02-02 11:49 | 1.5K | ||
| copy_thunk.h | 2025-02-02 11:49 | 2.6K | ||
| cublas_cudnn.h | 2025-02-02 11:49 | 9.0K | ||
| cublas_lt_matmul_thu..> | 2025-02-02 11:49 | 3.2K | ||
| cublas_pad_for_gemms.h | 2025-02-02 11:49 | 1.9K | ||
| cublas_padding_requi..> | 2025-02-02 11:49 | 1.6K | ||
| cudnn_fused_conv_rew..> | 2025-02-02 11:49 | 4.2K | ||
| cudnn_fused_mha_rewr..> | 2025-02-02 11:49 | 1.9K | ||
| cudnn_fused_mha_tran..> | 2025-02-02 11:49 | 1.4K | ||
| cudnn_pad_for_convol..> | 2025-02-02 11:49 | 1.9K | ||
| cudnn_simplify_paddi..> | 2025-02-02 11:49 | 2.6K | ||
| cudnn_support_utils.h | 2025-02-02 11:49 | 3.0K | ||
| cudnn_vectorize_conv..> | 2025-02-02 11:49 | 2.3K | ||
| cusolver_context.h | 2025-02-02 11:49 | 4.2K | ||
| cusolver_rewriter.h | 2025-02-02 11:49 | 1.6K | ||
| custom_call_thunk.h | 2025-02-02 11:49 | 2.6K | ||
| dot_dimension_sorter.h | 2025-02-02 11:49 | 1.8K | ||
| elemental_ir_emitter.h | 2025-02-02 11:49 | 5.5K | ||
| executable.pb.h | 2025-02-02 11:49 | 40K | ||
| fft_thunk.h | 2025-02-02 11:49 | 3.3K | ||
| for_thunk.h | 2025-02-02 11:49 | 1.7K | ||
| fused_mha_thunk.h | 2025-02-02 11:49 | 5.1K | ||
| fusion_merger.h | 2025-02-02 11:49 | 3.3K | ||
| fusion_pipeline.h | 2025-02-02 11:49 | 1.4K | ||
| fusion_wrapper.h | 2025-02-02 11:49 | 1.5K | ||
| fusions/ | 2025-02-02 11:49 | - | ||
| gemm_algorithm_picker.h | 2025-02-02 11:49 | 2.9K | ||
| gemm_broadcast_foldi..> | 2025-02-02 11:49 | 1.8K | ||
| gemm_rewriter.h | 2025-02-02 11:49 | 2.2K | ||
| gemm_rewriter_triton.h | 2025-02-02 11:49 | 6.5K | ||
| gemm_thunk.h | 2025-02-02 11:49 | 2.0K | ||
| gpu_all_gather_optim..> | 2025-02-02 11:49 | 1.4K | ||
| gpu_asm_opts_util.h | 2025-02-02 11:49 | 1.1K | ||
| gpu_async_collective..> | 2025-02-02 11:49 | 1.5K | ||
| gpu_autotuning.pb.h | 2025-02-02 11:49 | 56K | ||
| gpu_compiler.h | 2025-02-02 11:49 | 10K | ||
| gpu_constants.h | 2025-02-02 11:49 | 1.9K | ||
| gpu_conv_padding_leg..> | 2025-02-02 11:49 | 1.8K | ||
| gpu_conv_rewriter.h | 2025-02-02 11:49 | 1.9K | ||
| gpu_conv_runner.h | 2025-02-02 11:49 | 9.6K | ||
| gpu_convert_async_co..> | 2025-02-02 11:49 | 1.4K | ||
| gpu_cost_model_stats..> | 2025-02-02 11:49 | 2.0K | ||
| gpu_executable.h | 2025-02-02 11:49 | 14K | ||
| gpu_executable_run_o..> | 2025-02-02 11:49 | 4.7K | ||
| gpu_float_support.h | 2025-02-02 11:49 | 1.7K | ||
| gpu_fused_mha_runner.h | 2025-02-02 11:49 | 15K | ||
| gpu_fusible.h | 2025-02-02 11:49 | 8.7K | ||
| gpu_hlo_cost_analysis.h | 2025-02-02 11:49 | 4.5K | ||
| gpu_hlo_schedule.h | 2025-02-02 11:49 | 1.6K | ||
| gpu_layout_assignment.h | 2025-02-02 11:49 | 2.6K | ||
| gpu_performance_model.h | 2025-02-02 11:49 | 5.5K | ||
| gpu_reduce_scatter_c..> | 2025-02-02 11:49 | 1.3K | ||
| gpu_sanitize_constan..> | 2025-02-02 11:49 | 1.4K | ||
| gpu_scatter_expander.h | 2025-02-02 11:49 | 1.3K | ||
| gpu_target_config.h | 2025-02-02 11:49 | 1.4K | ||
| gpu_transfer_manager.h | 2025-02-02 11:49 | 4.7K | ||
| hlo_algorithm_denyli..> | 2025-02-02 11:49 | 1.2K | ||
| hlo_fusion_analysis.h | 2025-02-02 11:49 | 5.3K | ||
| hlo_fusion_stats.h | 2025-02-02 11:49 | 1.8K | ||
| hlo_op_profile.pb.h | 2025-02-02 11:49 | 32K | ||
| hlo_op_profiles.h | 2025-02-02 11:49 | 66K | ||
| hlo_to_ir_bindings.h | 2025-02-02 11:49 | 4.5K | ||
| hlo_traversal.h | 2025-02-02 11:49 | 3.3K | ||
| horizontal_input_fus..> | 2025-02-02 11:49 | 2.3K | ||
| horizontal_loop_fusi..> | 2025-02-02 11:49 | 5.6K | ||
| infeed_manager.h | 2025-02-02 11:49 | 2.4K | ||
| infeed_thunk.h | 2025-02-02 11:49 | 1.7K | ||
| instruction_fusion.h | 2025-02-02 11:49 | 3.1K | ||
| ir_emission_utils.h | 2025-02-02 11:49 | 9.0K | ||
| ir_emitter.h | 2025-02-02 11:49 | 6.7K | ||
| ir_emitter_context.h | 2025-02-02 11:49 | 4.3K | ||
| ir_emitter_nested.h | 2025-02-02 11:49 | 3.4K | ||
| ir_emitter_triton.h | 2025-02-02 11:49 | 4.0K | ||
| ir_emitter_unnested.h | 2025-02-02 11:49 | 17K | ||
| kernel_arguments.h | 2025-02-02 11:49 | 2.9K | ||
| kernel_mapping_scheme.h | 2025-02-02 11:49 | 8.9K | ||
| kernel_reuse_cache.h | 2025-02-02 11:49 | 2.6K | ||
| kernel_thunk.h | 2025-02-02 11:49 | 3.9K | ||
| launch_dimensions.h | 2025-02-02 11:49 | 4.7K | ||
| llvm_gpu_backend/ | 2025-02-02 11:49 | - | ||
| loop_double_buffer_t..> | 2025-02-02 11:49 | 2.0K | ||
| matmul_utils.h | 2025-02-02 11:49 | 11K | ||
| memset_thunk.h | 2025-02-02 11:49 | 2.8K | ||
| metrics.h | 2025-02-02 11:49 | 1.7K | ||
| move_copy_to_users.h | 2025-02-02 11:49 | 1.3K | ||
| multi_output_fusion.h | 2025-02-02 11:49 | 5.7K | ||
| nccl_all_gather_thunk.h | 2025-02-02 11:49 | 2.3K | ||
| nccl_all_reduce_thunk.h | 2025-02-02 11:49 | 4.4K | ||
| nccl_all_to_all_thunk.h | 2025-02-02 11:49 | 2.4K | ||
| nccl_collective_perm..> | 2025-02-02 11:49 | 2.7K | ||
| nccl_collective_thunk.h | 2025-02-02 11:49 | 7.3K | ||
| nccl_p2p_thunk_common.h | 2025-02-02 11:49 | 5.0K | ||
| nccl_recv_thunk.h | 2025-02-02 11:49 | 2.3K | ||
| nccl_send_thunk.h | 2025-02-02 11:49 | 2.3K | ||
| nccl_utils.h | 2025-02-02 11:49 | 4.2K | ||
| non_atomically_upgra..> | 2025-02-02 11:49 | 3.0K | ||
| nvptx_compiler.h | 2025-02-02 11:49 | 5.7K | ||
| outfeed_manager.h | 2025-02-02 11:49 | 2.5K | ||
| outfeed_thunk.h | 2025-02-02 11:49 | 1.7K | ||
| parallel_loop_emitter.h | 2025-02-02 11:49 | 3.6K | ||
| precompiled_kernels.h | 2025-02-02 11:49 | 2.4K | ||
| prepare_hlo_for_ir_e..> | 2025-02-02 11:49 | 1.4K | ||
| priority_fusion.h | 2025-02-02 11:49 | 2.7K | ||
| reduction_degenerate..> | 2025-02-02 11:49 | 1.8K | ||
| reduction_dimension_..> | 2025-02-02 11:49 | 1.7K | ||
| reduction_layout_nor..> | 2025-02-02 11:49 | 1.8K | ||
| reduction_splitter.h | 2025-02-02 11:49 | 2.0K | ||
| reduction_utils.h | 2025-02-02 11:49 | 2.7K | ||
| replica_id_thunk.h | 2025-02-02 11:49 | 1.8K | ||
| runtime/ | 2025-02-02 11:49 | - | ||
| runtime2/ | 2025-02-02 11:49 | - | ||
| runtime3/ | 2025-02-02 11:49 | - | ||
| runtime_intrinsics.h | 2025-02-02 11:49 | 1.0K | ||
| scatter_slice_simpli..> | 2025-02-02 11:49 | 2.2K | ||
| sequential_thunk.h | 2025-02-02 11:49 | 1.9K | ||
| softmax_rewriter_tri..> | 2025-02-02 11:49 | 2.5K | ||
| split_k_gemm_rewriter.h | 2025-02-02 11:49 | 1.6K | ||
| stream_executor_util.h | 2025-02-02 11:49 | 5.2K | ||
| target_constants.h | 2025-02-02 11:49 | 1.9K | ||
| target_util.h | 2025-02-02 11:49 | 3.0K | ||
| thunk.h | 2025-02-02 11:49 | 6.6K | ||
| topk_specializer.h | 2025-02-02 11:49 | 1.5K | ||
| topk_splitter.h | 2025-02-02 11:49 | 1.8K | ||
| tree_reduction_rewri..> | 2025-02-02 11:49 | 3.2K | ||
| triangular_solve_rew..> | 2025-02-02 11:49 | 2.2K | ||
| triangular_solve_thu..> | 2025-02-02 11:49 | 3.3K | ||
| triton_autotuner.h | 2025-02-02 11:49 | 2.3K | ||
| variadic_op_splitter.h | 2025-02-02 11:49 | 1.4K | ||
| while_thunk.h | 2025-02-02 11:49 | 2.5K | ||
| xfeed_queue.h | 2025-02-02 11:49 | 5.0K | ||
| xla_executor_state.h | 2025-02-02 11:49 | 2.0K | ||