Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for c_next in the auxinfo_t struct. #632

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions frame/3/bli_l3_thrinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
// change depending on BLIS_ENABLE_JRIR_SLAB / BLIS_ENABLE_JRIR_RR.
#define bli_gemm_get_next_a_upanel( a1, step, inc ) ( a1 + step * inc )
#define bli_gemm_get_next_b_upanel( b1, step, inc ) ( b1 + step * inc )
#define bli_gemm_get_next_c_utilem( c1, step, inc ) ( c1 + step * inc )
#define bli_gemm_get_next_c_utilen( c1, step, inc ) ( c1 + step * inc )

// gemmt

Expand Down
7 changes: 7 additions & 0 deletions frame/3/gemm/bli_gemm_ker_var2.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,18 +293,25 @@ void bli_gemm_ker_var2

// Compute the addresses of the next panels of A and B.
const char* a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc );
const char* c2 = bli_gemm_get_next_c_utilem( c11, rstep_c, ir_inc );
if ( bli_is_last_iter( i, ir_end, ir_tid, ir_nt ) )
{
a2 = a_cast;
b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc );
c2 = bli_gemm_get_next_c_utilen( c1, cstep_c, jr_inc );
if ( bli_is_last_iter( j, jr_end, jr_tid, jr_nt ) )
{
b2 = b_cast;
c2 = bli_gemm_get_next_c_utilem( c_cast, rs_c, m );
c2 = bli_gemm_get_next_c_utilem( c2, rstep_c, ir_inc );
}
}

// Save addresses of next panels of A and B to the auxinfo_t
// object.
bli_auxinfo_set_next_a( a2, &aux );
bli_auxinfo_set_next_b( b2, &aux );
bli_auxinfo_set_next_c( c2, &aux );

// Edge case handling now occurs within the microkernel itself, but
// we must still explicitly accumulate to a temporary microtile in
Expand Down
8 changes: 8 additions & 0 deletions frame/base/bli_auxinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ BLIS_INLINE const void* bli_auxinfo_next_b( const auxinfo_t* ai )
{
return ai->b_next;
}
BLIS_INLINE const void* bli_auxinfo_next_c( const auxinfo_t* ai )
{
return ai->c_next;
}

BLIS_INLINE inc_t bli_auxinfo_is_a( const auxinfo_t* ai )
{
Expand Down Expand Up @@ -103,6 +107,10 @@ BLIS_INLINE void bli_auxinfo_set_next_b( const void* p, auxinfo_t* ai )
{
ai->b_next = p;
}
BLIS_INLINE void bli_auxinfo_set_next_c( const void* p, auxinfo_t* ai )
{
ai->c_next = p;
}
BLIS_INLINE void bli_auxinfo_set_next_ab( const void* ap, const void* bp, auxinfo_t* ai )
{
ai->a_next = ap;
Expand Down
5 changes: 3 additions & 2 deletions frame/include/bli_type_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1113,10 +1113,11 @@ typedef struct
pack_t schema_a;
pack_t schema_b;

// Pointers to the micro-panels of A and B which will be used by the
// next call to the micro-kernel.
// Pointers to the micro-panels of A and B, and micro-tile of C, which
// will be used by the next call to the micro-kernel.
const void* a_next;
const void* b_next;
const void* c_next;

// The imaginary strides of A and B.
inc_t is_a;
Expand Down