-
Notifications
You must be signed in to change notification settings - Fork 5.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
8332163: C2 SuperWord: refactor PacksetGraph and SuperWord::output into VTransformGraph #19261
base: master
Are you sure you want to change the base?
Conversation
👋 Welcome back epeter! A progress list of the required criteria for merging this PR into |
❗ This change is not yet ready to be integrated. |
} else if (opc == Op_SqrtF || opc == Op_SqrtD || | ||
opc == Op_AbsF || opc == Op_AbsD || | ||
opc == Op_AbsI || opc == Op_AbsL || | ||
opc == Op_NegF || opc == Op_NegD || | ||
opc == Op_RoundF || opc == Op_RoundD || | ||
opc == Op_ReverseBytesI || opc == Op_ReverseBytesL || | ||
opc == Op_ReverseBytesUS || opc == Op_ReverseBytesS || | ||
opc == Op_ReverseI || opc == Op_ReverseL || | ||
opc == Op_PopCountI || opc == Op_CountLeadingZerosI || | ||
opc == Op_CountTrailingZerosI) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: condition captured by VTransformElementWiseVectorNode::is_unary_element_wise_opcode
.
assert(n->req() == 2, "only one input expected"); | ||
Node* in = vector_opd(p, 1); | ||
vn = VectorNode::make(opc, in, nullptr, vlen, velt_basic_type(n)); | ||
vlen_in_bytes = vn->as_Vector()->length_in_bytes(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see VTransformElementWiseVectorNode::apply
.
} else if (requires_long_to_int_conversion(opc)) { | ||
// Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros | ||
// returns int type, but Vector API for them returns long type. To unify | ||
// the implementation in backend, superword splits the vector implementation | ||
// for Java API into an execution node with long type plus another node | ||
// converting long to int. | ||
assert(n->req() == 2, "only one input expected"); | ||
Node* in = vector_opd(p, 1); | ||
Node* longval = VectorNode::make(opc, in, nullptr, vlen, T_LONG); | ||
phase()->register_new_node_with_ctrl_of(longval, first); | ||
vn = VectorCastNode::make(Op_VectorCastL2X, longval, T_INT, vlen); | ||
vlen_in_bytes = vn->as_Vector()->length_in_bytes(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see VTransformElementWiseVectorNode::apply
.
} else if (VectorNode::is_convert_opcode(opc)) { | ||
assert(n->req() == 2, "only one input expected"); | ||
BasicType bt = velt_basic_type(n); | ||
Node* in = vector_opd(p, 1); | ||
int vopc = VectorCastNode::opcode(opc, in->bottom_type()->is_vect()->element_basic_type()); | ||
vn = VectorCastNode::make(vopc, in, bt, vlen); | ||
vlen_in_bytes = vn->as_Vector()->length_in_bytes(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see VTransformElementWiseVectorNode::apply
.
} else if (opc == Op_FmaD || opc == Op_FmaF) { | ||
// Promote operands to vector | ||
Node* in1 = vector_opd(p, 1); | ||
Node* in2 = vector_opd(p, 2); | ||
Node* in3 = vector_opd(p, 3); | ||
vn = VectorNode::make(opc, in1, in2, in3, vlen, velt_basic_type(n)); | ||
vlen_in_bytes = vn->as_Vector()->length_in_bytes(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see VTransformElementWiseVectorNode::apply
.
#ifdef ASSERT | ||
// Mark Load/Store Vector for alignment verification | ||
if (VerifyAlignVector) { | ||
if (vn->Opcode() == Op_LoadVector) { | ||
vn->as_LoadVector()->set_must_verify_alignment(); | ||
} else if (vn->Opcode() == Op_StoreVector) { | ||
vn->as_StoreVector()->set_must_verify_alignment(); | ||
} | ||
} | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: directly moved to VTransform(Load/Store)VectorNode::apply
.
phase()->register_new_node_with_ctrl_of(vn, first); | ||
for (uint j = 0; j < p->size(); j++) { | ||
Node* pm = p->at(j); | ||
igvn().replace_node(pm, vn); | ||
} | ||
igvn()._worklist.push(vn); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see VTransformVectorNode::register_new_node_from_vectorization_and_replace_scalar_nodes
.
if (vlen > max_vlen) { | ||
max_vlen = vlen; | ||
} | ||
if (vlen_in_bytes > max_vlen_in_bytes) { | ||
max_vlen_in_bytes = vlen_in_bytes; | ||
} | ||
VectorNode::trace_new_vector(vn, "SuperWord"); | ||
} | ||
}//for (int i = 0; i < body().length(); i++) | ||
|
||
if (max_vlen_in_bytes > C->max_vector_size()) { | ||
C->set_max_vector_size(max_vlen_in_bytes); | ||
} | ||
if (max_vlen_in_bytes > 0) { | ||
cl->mark_loop_vectorized(); | ||
} | ||
|
||
if (SuperWordLoopUnrollAnalysis) { | ||
if (cl->has_passed_slp()) { | ||
uint slp_max_unroll_factor = cl->slp_max_unroll(); | ||
if (slp_max_unroll_factor == max_vlen) { | ||
#ifndef PRODUCT | ||
if (TraceSuperWordLoopUnrollAnalysis) { | ||
tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte); | ||
} | ||
#endif | ||
// For atomic unrolled loops which are vector mapped, instigate more unrolling | ||
cl->set_notpassed_slp(); | ||
// if vector resources are limited, do not allow additional unrolling | ||
if (Matcher::float_pressure_limit() > 8) { | ||
C->set_major_progress(); | ||
cl->mark_do_unroll_only(); | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see VTransformGraph::apply_vectorization
.
|
||
//------------------------------vector_opd--------------------------- | ||
// Create a vector operand for the nodes in pack p for operand: in(opd_idx) | ||
Node* SuperWord::vector_opd(Node_List* p, int opd_idx) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note:
Rather than only finding input vectors when we actually apply the vectorization, we now capture the input-relationship already for the VTransformGraph
. We use SuperWordVTransformBuilder::find_input_for_vector
for this now.
When we actually VTransform...Node::apply
, then we can call VTransformNode::find_transformed_input
to find the already transformed/applied inputs.
uint vlen = p->size(); | ||
Node* opd = p0->in(opd_idx); | ||
CountedLoopNode *cl = lpt()->_head->as_CountedLoop(); | ||
bool have_same_inputs = same_inputs(p, opd_idx); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: replaced with Node* unique = _packset.isa_unique_input_or_null(pack, j);
.
if (opd->is_Vector() || opd->is_LoadVector()) { | ||
if (opd_idx == 2 && VectorNode::is_shift(p0)) { | ||
assert(false, "shift's count can't be vector"); | ||
return nullptr; | ||
} | ||
return opd; // input is matching vector | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: since here the inputs were already transformed, we got input VectorNode
or LoadVectorNode
, for all members of the pack this same vector node. But now that we model this with VTransform
, the input is also a pack. We handle this in SuperWordVTransformBuilder::find_input_for_vector
with:
Node_List* pack_in = _packset.isa_pack_input_or_null(pack, j);
// Insert pack operation | ||
BasicType bt = velt_basic_type(p0); | ||
PackNode* pk = PackNode::make(opd, vlen, bt); | ||
DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); ) | ||
|
||
for (uint i = 1; i < vlen; i++) { | ||
Node* pi = p->at(i); | ||
Node* in = pi->in(opd_idx); | ||
if (get_pack(in) != nullptr) { | ||
assert(false, "Should already have been unpacked"); | ||
return nullptr; | ||
} | ||
assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); | ||
pk->add_opd(in); | ||
if (VectorNode::is_muladds2i(pi)) { | ||
Node* in2 = pi->in(opd_idx + 2); | ||
if (get_pack(in2) != nullptr) { | ||
assert(false, "Should already have been unpacked"); | ||
return nullptr; | ||
} | ||
assert(opd_bt == in2->bottom_type()->basic_type(), "all same type"); | ||
pk->add_opd(in2); | ||
} | ||
} | ||
phase()->register_new_node_with_ctrl_of(pk, opd); | ||
VectorNode::trace_new_vector(pk, "SuperWord"); | ||
return pk; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: this PackNode
code seems to be dead code, if you read SuperWord::profitable
then there are comments saying that we are not using PackNode
. I replaced it with a ShouldNotReachHere
in SuperWordVTransformBuilder::find_input_for_vector
.
@@ -3807,3 +3036,412 @@ bool SuperWord::same_origin_idx(Node* a, Node* b) const { | |||
bool SuperWord::same_generation(Node* a, Node* b) const { | |||
return a != nullptr && b != nullptr && _clone_map.same_gen(a->_idx, b->_idx); | |||
} | |||
|
|||
bool SuperWord::vtransform() const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: most similar code used to be SuperWord::schedule()
. Call to graph.apply()
is roughly what SuperWord::output()
used to be.
return true; | ||
} | ||
|
||
void SuperWordVTransformBuilder::build_vtransform() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see PacksetGraph::build
} | ||
} | ||
|
||
void SuperWordVTransformBuilder::build_edges_for_vector_vtnodes(VectorSet& vtn_dependencies) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: case distinction based on SuperWord::output
.
} | ||
} | ||
|
||
void SuperWordVTransformBuilder::build_edges_for_scalar_vtnodes(VectorSet& vtn_dependencies) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: case distinction based on SuperWord::output
.
} | ||
|
||
// Create a vtnode for each pack. No in/out edges set yet. | ||
VTransformVectorNode* SuperWordVTransformBuilder::make_vtnode_for_pack(const Node_List* pack) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: case distinction based on SuperWord::output
.
vtn_dependencies.set(req->_idx); | ||
} | ||
|
||
VTransformNode* SuperWordVTransformBuilder::find_input_for_vector(int j, Node_List* pack) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see SuperWord::vector_opd
.
} | ||
} | ||
|
||
void SuperWordVTransformBuilder::add_dependencies(VTransformNode* vtn, VectorSet& vtn_dependencies, Node* n) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see PacksetGraph::build
for the old VLoopDependencyGraph::PredsIterator
usage.
} | ||
} | ||
|
||
Node* PackSet::isa_unique_input_or_null(const Node_List* pack, int j) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see SuperWord::same_inputs
return pack_in; | ||
} | ||
|
||
VTransformBoolTest PackSet::get_bool_test(const Node_List* pack) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see SuperWord::output
for Cmp / Bool / CMove
pattern.
}; | ||
|
||
// Facility class that builds a VTransformGraph from a SuperWord PackSet. | ||
class SuperWordVTransformBuilder : public StackObj { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: building the graph used to be done directly in PacksetGraph
. This is the translation from SuperWord
to VTransform
. In the future, we could have other vectorization approaches that construct a VTransform
with something else than SuperWord
.
// +--------+ | ||
// | ||
// We return "true" IFF we find no cycle, i.e. if the linearization succeeds. | ||
bool VTransformGraph::schedule() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see comments above PacksetGraph
, and PacksetGraph::schedule
. I now use reverse-post-order instead of topsort, because that does not require modifying the graph (topsort requires edge-removal). We also use reverse-post-order elsewhere in C2, and not topsort.
|
||
adjust_pre_loop_limit_to_align_main_loop_vectors(); | ||
apply_vectorization(); | ||
phase()->C->print_method(PHASE_AUTO_VECTORIZE3_AFTER_APPLY, 4, cl()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: now I moved all methods that modify the graph to this place, and also I renamed the print-method tags to be more generic and not SuperWord specific.
|
||
// We call "apply" on every VTransformNode, which replaces the packed scalar nodes | ||
// with vector nodes. | ||
void VTransformGraph::apply_vectorization() const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: see the loop over all packs in SuperWord::output
, and also the cl
marking and SuperWordLoopUnrollAnalysis
is taken from there. That code may deserve some attention/refactoring in the future.
|
||
// Invoke callback on all memops, in the order of the schedule. | ||
template<typename Callback> | ||
void VTransformGraph::for_each_memop_in_schedule(Callback callback) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: needed to put this here in the hpp
so that the template
could be instanciated, not just for the vectorization.cpp
but also for superword.cpp
, where I have still 1 usage. I can probably move around some code in a future RFE to improve this, but for now this reduces the number of lines I need to let you review ;)
return false; | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: Comments used to be at use-site in superword code.
This is a rather large refactoring, and it was difficult to find anything more to split off into a separate RFE without making the total number of lines changed massively larger.
I added some extra tests for this in: #19558
I extracted some refactorings to: #19573
We used to have:
PacksetGraph
: this detects cycles introduces by packs, and schedules/reorders the memops.SuperWord::output
: creates VectorNodes directly from the PackSet.In my blog, I have published lots of ideas for SuperWord / AutoVectorization improvements:
https://eme64.github.io/blog/2023/11/03/C2-AutoVectorizer-Improvement-Ideas.html
Many ideas are based on the "VectorTransform IR": cost-model, if-conversion, direct widening of scalars to vectors, additional optimizations/features with shuffle/pack/extract, handling more reduction patterns, etc.
I now decided to name it
VTransform
, which is essencially a graphVtransformGraph
of nodesVTransformNodes
that resemble the C2Node
on purpose, because theVTransform
models the C2 graph after vectorization. We can now model the transformation from scalar-loop to vectorized-loop without modifying the C2 graph yet.The new code has these steps:
PackSet
fromSuperWord
, we create aVTransformGraph
withSuperWordVTransformBuilder
.VTransformGraph
, in future RFE's]VTransformGraph
, and check for cycles.VTransformGraph::apply
which lets each individualVTransformNode::apply
generate the new vectorized C2 nodes.Progress
Issue
Reviewing
Using
git
Checkout this PR locally:
$ git fetch https://git.openjdk.org/jdk.git pull/19261/head:pull/19261
$ git checkout pull/19261
Update a local copy of the PR:
$ git checkout pull/19261
$ git pull https://git.openjdk.org/jdk.git pull/19261/head
Using Skara CLI tools
Checkout this PR locally:
$ git pr checkout 19261
View PR using the GUI difftool:
$ git pr show -t 19261
Using diff file
Download this PR as a diff file:
https://git.openjdk.org/jdk/pull/19261.diff