Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional offloading to AMD GPUs #626

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
5 changes: 5 additions & 0 deletions build/bli_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -183,5 +183,10 @@
#define BLIS_DISABLE_COMPLEX_RETURN_INTEL
#endif

#if @enable_amd_offload@
#define BLIS_ENABLE_AMD_OFFLOAD
#else
#define BLIS_DISABLE_AMD_OFFLOAD
#endif

#endif
3 changes: 3 additions & 0 deletions build/config.mk.in
Original file line number Diff line number Diff line change
Expand Up @@ -208,5 +208,8 @@ LIBPTHREAD := @libpthread@
# Whether we should use AMD-customized versions of certain framework files.
ENABLE_AMD_FRAME_TWEAKS := @enable_amd_frame_tweaks@

# Whether offloading to AMD accelerators should be attempted
ENABLE_AMD_OFFLOAD := @enable_amd_offload@

# end of ifndef CONFIG_MK_INCLUDED conditional block
endif
10 changes: 10 additions & 0 deletions common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,11 @@ ifeq ($(DEBUG_TYPE),sde)
LDFLAGS := $(filter-out $(LIBMEMKIND),$(LDFLAGS))
endif

ifeq ($(ENABLE_AMD_OFFLOAD),yes)
LDFLAGS += -ldl
LDFLAGS += -L/opt/rocm/lib -lamdhip64 -lrocblas
endif

# Specify the shared library's 'soname' field.
# NOTE: The flag for creating shared objects is different for Linux and OS X.
ifeq ($(OS_NAME),Darwin)
Expand Down Expand Up @@ -1134,6 +1139,11 @@ ifeq ($(MK_ENABLE_CBLAS),yes)
CINCFLAGS += -I$(CBLAS_H_DIRPATH)
endif

# If AMD offloading is enabled, we also add the ROCm include directory
ifeq ($(ENABLE_AMD_OFFLOAD),yes)
CINCFLAGS += -I/opt/rocm/include -D__HIP_PLATFORM_AMD__=1
endif

# Obtain a list of header paths in the configured addons. Then add -I to each
# header path.
CADDONINCFLAGS := $(strip $(patsubst %, -I%, $(ADDON_HDR_DIRPATHS)))
Expand Down
41 changes: 41 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,15 @@ print_usage()
echo " which are determined by the BLIS subconfiguration used at"
echo " runtime.) By default, these customized files are disabled."
echo " "
echo " --enable-amd-offload, --disable-amd-offload"
echo " "
echo " Enable conditional offloading of some Level-3 BLAS calls"
echo " to AMD accelerators such as MI100, MI200."
echo " Enabling this option requires ROCm to be installed and"
echo " uses rocBLAS as a backend."
echo " Introduces rocblas-dev and hip-dev as dependencies."
echo " By default, the offloading path are disabled."
echo " "
echo " -a NAME --enable-addon=NAME"
echo " "
echo " Enable the code provided by an addon. An addon consists"
Expand Down Expand Up @@ -2469,6 +2478,7 @@ main()
enable_mixed_dt_extra_mem='yes'
enable_sup_handling='yes'
enable_amd_frame_tweaks='no'
enable_amd_offload='no'
enable_memkind='' # The default memkind value is determined later on.
enable_trsm_preinversion='yes'
force_version='no'
Expand Down Expand Up @@ -2687,6 +2697,12 @@ main()
disable-amd-frame-tweaks)
enable_amd_frame_tweaks='no'
;;
enable-amd-offload)
enable_amd_offload='yes'
;;
disable-amd-offload)
enable_amd_offload='no'
;;
with-memkind)
enable_memkind='yes'
;;
Expand Down Expand Up @@ -3616,6 +3632,29 @@ main()
echo "${script_name}: AMD-specific framework files will not be considered."
fi

# Check whether anything should be offloaded to AMD accelerators
enable_amd_offload_01=0
if [ "x${enable_amd_offload}" = "xyes" ]; then
echo "${script_name}: Offloading to AMD accelerators will be considered."
echo "${script_name}: checking for ROCm installation and availability."

# Make sure there's a ROCm installation present
# use rocm_agent_enumerator to see if there's a gfx != gfx000
gfxs=`rocm_agent_enumerator`
if [ -z "$gfxs" ]; then
echo "${script_name}: rocm_agent_enumerator returns no agents."
enable_amd_offload='no'
elif [[ "$gfxs" =~ "gfx9" ]] || [[ "$gfxs" =~ "gfx10" ]]; then
echo "${script_name}: found AMD accelerator(s)."
enable_amd_offload_01=1
else
echo "${script_name}: Illegal rocm_agent_enumerator output. $gfsx"
enable_amd_offload='no'
fi
else
echo "${script_name}: Offloading to AMD accelerators will not be considered."
fi

# Check if addons were given.
if [ -n "${addon_flag}" ]; then

Expand Down Expand Up @@ -3871,6 +3910,7 @@ main()
| sed -e "s/@enable_blas@/${enable_blas}/g" \
| sed -e "s/@enable_cblas@/${enable_cblas}/g" \
| sed -e "s/@enable_amd_frame_tweaks@/${enable_amd_frame_tweaks}/g" \
| sed -e "s/@enable_amd_offload@/${enable_amd_offload}/g" \
| sed -e "s/@enable_memkind@/${enable_memkind}/g" \
| sed -e "s/@pragma_omp_simd@/${pragma_omp_simd}/g" \
| sed -e "s/@addon_list@/${addon_list}/g" \
Expand Down Expand Up @@ -3910,6 +3950,7 @@ main()
| sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
| sed -e "s/@complex_return_intel@/${complex_return_intel01}/g" \
| sed -e "s/@enable_amd_offload@/${enable_amd_offload_01}/g" \
> "${bli_config_h_out_path}"

# -- Instantiate bli_addon.h file from template ----------------------------
Expand Down
18 changes: 18 additions & 0 deletions frame/3/bli_l3_oapi_ex.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
*/

#include "blis.h"
#ifdef BLIS_ENABLE_AMD_OFFLOAD
#include "../base/bli_offloader.h"
#endif

//
// Define object-based interfaces (expert).
Expand Down Expand Up @@ -73,6 +76,21 @@ void PASTEMAC(gemm,BLIS_OAPI_EX_SUF)
return;
}
}
#ifdef BLIS_ENABLE_AMD_OFFLOAD
// check if we should offload - since attempting to offload and fail
// incurrs a non-trivial cost, we only want to fail and fall through
// in rare cases
const bool do_offload = bli_do_offload_gemmex( alpha, a, b, beta, c);
if ( do_offload )
{
// attempts to offload
const err_t result = bli_offload_gemmex( alpha, a, b, beta, c);
if ( result == BLIS_SUCCESS )
{
return;
}
}
#endif

// Initialize a local runtime with global settings if necessary. Note
// that in the case that a runtime is passed in, we make a local copy.
Expand Down
13 changes: 12 additions & 1 deletion frame/base/bli_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
*/

#include "blis.h"
#ifdef BLIS_ENABLE_AMD_OFFLOAD
#include "bli_offloader.h"
#endif

// -----------------------------------------------------------------------------

Expand Down Expand Up @@ -87,7 +90,11 @@ int bli_init_apis( void )
bli_pack_init();
bli_memsys_init();

return 0;
#ifdef BLIS_ENABLE_AMD_OFFLOAD
bli_offloader_init();
#endif

return 0;
}

int bli_finalize_apis( void )
Expand All @@ -99,6 +106,10 @@ int bli_finalize_apis( void )
bli_ind_finalize();
bli_gks_finalize();

#ifdef BLIS_ENABLE_AMD_OFFLOAD
bli_offloader_finalize();
#endif

return 0;
}

Loading