-
Notifications
You must be signed in to change notification settings - Fork 113
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add ck_tile for matrix_core swizzle kernel
- Loading branch information
1 parent
b2e95e2
commit 22ab193
Showing
20 changed files
with
2,699 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# generate a list of kernels, but not actually emit files at config stage | ||
execute_process( | ||
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/generate.py | ||
--api fwd,fwd_splitkv --list_blobs ${CMAKE_CURRENT_BINARY_DIR}/fwd_blob_list.txt | ||
) | ||
|
||
execute_process( | ||
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/generate.py | ||
--api bwd --list_blobs ${CMAKE_CURRENT_BINARY_DIR}/bwd_blob_list.txt | ||
) | ||
|
||
# NOTE: for cmake, the FMHA_FWD_GEN_BLOBS/FMHA_BWD_GEN_BLOBS files must be in the same directory | ||
# as current cmake list, otherwise will not figure out the dependency properly | ||
file(STRINGS ${CMAKE_CURRENT_BINARY_DIR}/fwd_blob_list.txt FMHA_FWD_GEN_BLOBS) | ||
file(STRINGS ${CMAKE_CURRENT_BINARY_DIR}/bwd_blob_list.txt FMHA_BWD_GEN_BLOBS) | ||
|
||
add_custom_command( | ||
OUTPUT ${FMHA_FWD_GEN_BLOBS} | ||
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/generate.py | ||
--api fwd,fwd_splitkv --output_dir ${CMAKE_CURRENT_BINARY_DIR} | ||
) | ||
|
||
add_custom_command( | ||
OUTPUT ${FMHA_BWD_GEN_BLOBS} | ||
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/generate.py | ||
--api bwd --output_dir ${CMAKE_CURRENT_BINARY_DIR} | ||
) | ||
|
||
set(EXAMPLE_FMHA_FWD "tile_example_fmha_fwd") | ||
# not using add_example_executable() to add this target, since we don't want this to have | ||
# to be included in "make all/install/check" | ||
message("adding example ${EXAMPLE_FMHA_FWD}") | ||
add_executable(${EXAMPLE_FMHA_FWD} EXCLUDE_FROM_ALL fmha_fwd.cpp) | ||
target_include_directories(${EXAMPLE_FMHA_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) | ||
target_sources(${EXAMPLE_FMHA_FWD} PRIVATE ${FMHA_FWD_GEN_BLOBS}) | ||
|
||
set(EXAMPLE_FMHA_BWD "tile_example_fmha_bwd") | ||
# not using add_example_executable() to add this target, since we don't want this to have | ||
# to be included in "make all/install/check" | ||
message("adding example ${EXAMPLE_FMHA_BWD}") | ||
add_executable(${EXAMPLE_FMHA_BWD} EXCLUDE_FROM_ALL fmha_bwd.cpp) | ||
target_include_directories(${EXAMPLE_FMHA_BWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) | ||
target_sources(${EXAMPLE_FMHA_BWD} PRIVATE ${FMHA_BWD_GEN_BLOBS}) | ||
|
||
# NOTE: this is dangerous since will change the whole kernel to flush denormals | ||
# WIP with compiler team for an exp2 intrinsic..., then remove this | ||
if(NOT DEFINED FMHA_FWD_FAST_EXP2) | ||
set(FMHA_FWD_FAST_EXP2 true) | ||
endif() | ||
|
||
set(EXAMPLE_FMHA_FWD_COMPILE_OPTIONS) | ||
set(EXAMPLE_FMHA_BWD_COMPILE_OPTIONS) | ||
|
||
# NOTE: we turn off undefined-func-template to let source compile without explicit declare function specializations | ||
# ... because they are auto-generated | ||
if(FMHA_FWD_FAST_EXP2) | ||
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero) | ||
list(APPEND EXAMPLE_FMHA_BWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero) | ||
else() | ||
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0) | ||
list(APPEND EXAMPLE_FMHA_BWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0) | ||
endif() | ||
|
||
# Allow comparing floating points directly in order to check sentinel values | ||
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-float-equal) | ||
list(APPEND EXAMPLE_FMHA_BWD_COMPILE_OPTIONS -Wno-float-equal) | ||
|
||
target_compile_options(${EXAMPLE_FMHA_FWD} PRIVATE ${EXAMPLE_FMHA_FWD_COMPILE_OPTIONS}) | ||
target_compile_options(${EXAMPLE_FMHA_BWD} PRIVATE ${EXAMPLE_FMHA_BWD_COMPILE_OPTIONS}) | ||
|
||
# TODO: we have to turn off this global prop, otherwise the progress bar generated | ||
# by cmake will print too many files, execvp: /bin/sh: Argument list too long | ||
# however, this property may affect global | ||
# TODO: consider codegen a makefile by us | ||
set_property(GLOBAL PROPERTY RULE_MESSAGES OFF) |
Oops, something went wrong.