On Thu, Aug 25, 2022 at 05:38:58PM +0200, Tobias Burnus wrote:
> On 25.08.22 16:54, Tobias Burnus wrote:
>
> The attached patch prepare for reverse-offload device->host
> function-address lookup by requesting (if needed) the on-device address.
>
>
> This patch adds the actual implementation for GCN. A variant would be
> to only generate .offload_func_table inside mkoffload when
> OMP_REQUIRES_REVERSE_OFFLOAD has been requested.
>
> This is currently effectively a no op as with [1/3] patch, always NULL
> is passed and as GOMP_OFFLOAD_get_num_devices returns <= 0 as soon as
> 'omp requires reverse_offload' has been specified.
>
> OK for mainline?
>
> Tobias
>
>
> -----------------
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
> GCN: libgomp+mkoffload.cc: Prepare for reverse offload fn lookup
>
> Add support to GCN for reverse lookup of function name to prepare for
> 'omp target device(ancestor:1)'.
>
> gcc/ChangeLog:
>
> * config/gcn/mkoffload.cc (process_asm): Create .offload_func_table,
> similar to pre-existing .offload_var_table.
>
> libgomp/ChangeLog:
>
> * plugin/plugin-gcn.c (GOMP_OFFLOAD_load_image): Read
> .offload_func_table to populate rev_fn_table when requested.
Ok.
Jakub
GCN: libgomp+mkoffload.cc: Prepare for reverse offload fn lookup
Add support to GCN for reverse lookup of function name to prepare for
'omp target device(ancestor:1)'.
gcc/ChangeLog:
* config/gcn/mkoffload.cc (process_asm): Create .offload_func_table,
similar to pre-existing .offload_var_table.
libgomp/ChangeLog:
* plugin/plugin-gcn.c (GOMP_OFFLOAD_load_image): Read
.offload_func_table to populate rev_fn_table when requested.
gcc/config/gcn/mkoffload.cc | 11 ++++++++++-
libgomp/plugin/plugin-gcn.c | 26 +++++++++++++++++++++++++-
2 files changed, 35 insertions(+), 2 deletions(-)
@@ -537,63 +537,72 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
case IN_VARS:
{
char *varname;
unsigned varsize;
if (sscanf (buf, " .8byte %ms\n", &varname))
{
fputs (buf, out);
fgets (buf, sizeof (buf), in);
if (!sscanf (buf, " .8byte %u\n", &varsize))
abort ();
var_count++;
}
break;
}
case IN_FUNCS:
{
char *funcname;
if (sscanf (buf, "\t.8byte\t%ms\n", &funcname))
{
+ fputs (buf, out);
obstack_ptr_grow (&fns_os, funcname);
fn_count++;
continue;
}
break;
}
}
char dummy;
if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0)
{
state = IN_VARS;
/* Add a global symbol to allow plugin-gcn.c to locate the table
at runtime. It can't use the "offload_var_table.N" emitted by
the compiler because a) they're not global, and b) there's one
for each input file combined into the binary. */
fputs (buf, out);
fputs ("\t.global .offload_var_table\n"
"\t.type .offload_var_table, @object\n"
".offload_var_table:\n",
out);
}
else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0)
- state = IN_FUNCS;
+ {
+ state = IN_FUNCS;
+ /* Likewise for .gnu.offload_vars; used for reverse offload. */
+ fputs (buf, out);
+ fputs ("\t.global .offload_func_table\n"
+ "\t.type .offload_func_table, @object\n"
+ ".offload_func_table:\n",
+ out);
+ }
else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0)
{
state = IN_METADATA;
regcount.kernel_name = NULL;
regcount.sgpr_count = regcount.vgpr_count = -1;
}
else if (sscanf (buf, " .section %c", &dummy) > 0
|| sscanf (buf, " .text%c", &dummy) > 0
|| sscanf (buf, " .bss%c", &dummy) > 0
|| sscanf (buf, " .data%c", &dummy) > 0
|| sscanf (buf, " .ident %c", &dummy) > 0)
state = IN_CODE;
else if (sscanf (buf, " .end_amdgpu_metadata%c", &dummy) > 0)
{
state = IN_CODE;
gcc_assert (regcount.kernel_name != NULL
&& regcount.sgpr_count >= 0
&& regcount.vgpr_count >= 0);
obstack_grow (®counts_os, ®count, sizeof (regcount));
@@ -3353,7 +3353,7 @@ GOMP_OFFLOAD_init_device (int n)
int
GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct addr_pair **target_table,
- uint64_t **rev_fn_table __attribute__((unused)))
+ uint64_t **rev_fn_table)
{
if (GOMP_VERSION_DEV (version) != GOMP_VERSION_GCN)
{
@@ -3520,6 +3520,30 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
if (module->fini_array_func)
kernel_count--;
+ if (rev_fn_table != NULL && kernel_count == 0)
+ *rev_fn_table = NULL;
+ else if (rev_fn_table != NULL)
+ {
+ hsa_status_t status;
+ hsa_executable_symbol_t var_symbol;
+ status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ ".offload_func_table",
+ agent->id, 0, &var_symbol);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not find symbol for variable in the code object",
+ status);
+ uint64_t fn_table_addr;
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
+ &fn_table_addr);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable from its symbol", status);
+ *rev_fn_table = GOMP_PLUGIN_malloc (kernel_count * sizeof (uint64_t));
+ GOMP_OFFLOAD_dev2host (agent->device_id, *rev_fn_table,
+ (void*) fn_table_addr,
+ kernel_count * sizeof (uint64_t));
+ }
+
return kernel_count + var_count + other_count;
}