// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/codegen/machine-type.h"
#include "src/compiler/common-operator.h"
#include "src/compiler/machine-graph.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node-properties.h"
#include "src/compiler/node.h"
#include "src/compiler/revectorizer.h"
#include "src/compiler/wasm-compiler.h"
#include "src/wasm/wasm-module.h"
#include "test/unittests/compiler/graph-unittest.h"
#include "test/unittests/compiler/node-test-utils.h"
#include "testing/gmock-support.h"

using testing::AllOf;
using testing::Capture;
using testing::CaptureEq;

namespace v8 {
namespace internal {
namespace compiler {

class RevecTest : public TestWithIsolateAndZone {
 public:
  RevecTest()
      : TestWithIsolateAndZone(kCompressGraphZone),
        graph_(zone()),
        common_(zone()),
        machine_(zone(), MachineRepresentation::kWord64,
                 MachineOperatorBuilder::Flag::kAllOptionalOps),
        mcgraph_(&graph_, &common_, &machine_) {}

  Graph* graph() { return &graph_; }
  CommonOperatorBuilder* common() { return &common_; }
  MachineOperatorBuilder* machine() { return &machine_; }
  MachineGraph* mcgraph() { return &mcgraph_; }

 private:
  Graph graph_;
  CommonOperatorBuilder common_;
  MachineOperatorBuilder machine_;
  MachineGraph mcgraph_;
};

// Create a graph which add two 256 bit vectors(a, b), store the result in c:
// simd128 *a,*b,*c;
// *c = *a + *b;
// *(c+1) = *(a+1) + *(b+1);
// In Revectorization, two simd 128 nodes can be combined into one 256 node:
// simd256 *d, *e, *f;
// *f = *d + *e;
TEST_F(RevecTest, F32x8Add) {
  if (!CpuFeatures::IsSupported(AVX2)) return;

  Node* start = graph()->NewNode(common()->Start(5));
  graph()->SetStart(start);

  Node* zero = graph()->NewNode(common()->Int32Constant(0));
  Node* sixteen = graph()->NewNode(common()->Int64Constant(16));
  // offset of memory start field in WASM instance object.
  Node* offset = graph()->NewNode(common()->Int64Constant(23));

  Node* p0 = graph()->NewNode(common()->Parameter(0), start);
  Node* p1 = graph()->NewNode(common()->Parameter(1), start);
  Node* p2 = graph()->NewNode(common()->Parameter(2), start);
  Node* p3 = graph()->NewNode(common()->Parameter(3), start);

  StoreRepresentation store_rep(MachineRepresentation::kSimd128,
                                WriteBarrierKind::kNoWriteBarrier);
  LoadRepresentation load_rep(MachineType::Simd128());
  Node* load0 = graph()->NewNode(machine()->Load(MachineType::Int64()), p0,
                                 offset, start, start);
  Node* mem_buffer1 = graph()->NewNode(machine()->Int64Add(), load0, sixteen);
  Node* mem_buffer2 = graph()->NewNode(machine()->Int64Add(), load0, sixteen);
  Node* mem_store = graph()->NewNode(machine()->Int64Add(), load0, sixteen);
  Node* load1 = graph()->NewNode(machine()->ProtectedLoad(load_rep), load0, p1,
                                 load0, start);
  Node* load2 = graph()->NewNode(machine()->ProtectedLoad(load_rep),
                                 mem_buffer1, p1, load1, start);
  Node* load3 = graph()->NewNode(machine()->ProtectedLoad(load_rep), load0, p2,
                                 load2, start);
  Node* load4 = graph()->NewNode(machine()->ProtectedLoad(load_rep),
                                 mem_buffer2, p2, load3, start);
  Node* add1 = graph()->NewNode(machine()->F32x4Add(), load1, load3);
  Node* add2 = graph()->NewNode(machine()->F32x4Add(), load2, load4);
  Node* store1 = graph()->NewNode(machine()->Store(store_rep), load0, p3, add1,
                                  load4, start);
  Node* store2 = graph()->NewNode(machine()->Store(store_rep), mem_store, p3,
                                  add2, store1, start);
  Node* ret = graph()->NewNode(common()->Return(0), zero, store2, start);
  Node* end = graph()->NewNode(common()->End(1), ret);
  graph()->SetEnd(end);

  graph()->RecordSimdStore(store1);
  graph()->RecordSimdStore(store2);
  graph()->SetSimd(true);

  // Test whether the graph can be revectorized
  Revectorizer revec(zone(), graph(), mcgraph());
  EXPECT_TRUE(revec.TryRevectorize(nullptr));

  // Test whether the graph has been revectorized
  Node* store_256 = ret->InputAt(1);
  EXPECT_EQ(StoreRepresentationOf(store_256->op()).representation(),
            MachineRepresentation::kSimd256);
}

// Create a graph which multiplies a F32x8 vector with the first element of
// vector b and store the result to a F32x8 vector c:
//   float *a, *b, *c;
//   c[0123] = a[0123] * b[0000];
//   c[4567] = a[4567] * b[0000];
//
// After the revectorization phase, two consecutive 128-bit loads and multiplies
// can be coalesced using 256-bit vectors:
//   c[01234567] = a[01234567] * b[00000000];
TEST_F(RevecTest, F32x8Mul) {
  if (!CpuFeatures::IsSupported(AVX2)) return;

  Node* start = graph()->NewNode(common()->Start(4));
  graph()->SetStart(start);

  Node* zero = graph()->NewNode(common()->Int32Constant(0));
  Node* sixteen = graph()->NewNode(common()->Int64Constant(16));
  Node* offset = graph()->NewNode(common()->Int64Constant(23));

  // Wasm array base address
  Node* p0 = graph()->NewNode(common()->Parameter(0), start);
  // Load base address a*
  Node* p1 = graph()->NewNode(common()->Parameter(1), start);
  // LoadTransfrom base address b*
  Node* p2 = graph()->NewNode(common()->Parameter(2), start);
  // Store base address c*
  Node* p3 = graph()->NewNode(common()->Parameter(3), start);

  LoadRepresentation load_rep(MachineType::Simd128());
  StoreRepresentation store_rep(MachineRepresentation::kSimd128,
                                WriteBarrierKind::kNoWriteBarrier);
  Node* base = graph()->NewNode(machine()->Load(MachineType::Int64()), p0,
                                offset, start, start);
  Node* base16 = graph()->NewNode(machine()->Int64Add(), base, sixteen);
  Node* base16_store = graph()->NewNode(machine()->Int64Add(), base, sixteen);
  Node* load0 = graph()->NewNode(machine()->ProtectedLoad(load_rep), base, p1,
                                 base, start);
  Node* load1 = graph()->NewNode(machine()->ProtectedLoad(load_rep), base16, p1,
                                 load0, start);
  Node* load2 = graph()->NewNode(
      machine()->LoadTransform(MemoryAccessKind::kProtected,
                               LoadTransformation::kS128Load32Splat),
      base, p2, load1, start);
  Node* mul0 = graph()->NewNode(machine()->F32x4Mul(), load0, load2);
  Node* mul1 = graph()->NewNode(machine()->F32x4Mul(), load1, load2);
  Node* store0 = graph()->NewNode(machine()->Store(store_rep), base, p3, mul0,
                                  load2, start);
  Node* store1 = graph()->NewNode(machine()->Store(store_rep), base16_store, p3,
                                  mul1, store0, start);
  Node* ret = graph()->NewNode(common()->Return(0), zero, store1, start);
  Node* end = graph()->NewNode(common()->End(1), ret);
  graph()->SetEnd(end);

  graph()->RecordSimdStore(store0);
  graph()->RecordSimdStore(store1);
  graph()->SetSimd(true);

  Revectorizer revec(zone(), graph(), mcgraph());
  EXPECT_TRUE(revec.TryRevectorize(nullptr));

  // Test whether the graph has been revectorized
  Node* store_256 = ret->InputAt(1);
  EXPECT_EQ(StoreRepresentationOf(store_256->op()).representation(),
            MachineRepresentation::kSimd256);
}

// Create a graph with load chain that can not be packed due to effect
// dependency:
//   [Load4] -> [Load3] -> [Load2] -> [Irrelevant Load] -> [Load1]
//
// After reordering, no effect dependency will be broken so the graph can be
// revectorized:
//   [Load4] -> [Load3] -> [Load2] -> [Load1] -> [Irrelevant Load]
TEST_F(RevecTest, ReorderLoadChain) {
  if (!CpuFeatures::IsSupported(AVX2)) return;

  Node* start = graph()->NewNode(common()->Start(5));
  graph()->SetStart(start);

  Node* zero = graph()->NewNode(common()->Int32Constant(0));
  Node* sixteen = graph()->NewNode(common()->Int64Constant(16));
  // offset of memory start field in WASM instance object.
  Node* offset = graph()->NewNode(common()->Int64Constant(23));

  Node* p0 = graph()->NewNode(common()->Parameter(0), start);
  Node* p1 = graph()->NewNode(common()->Parameter(1), start);
  Node* p2 = graph()->NewNode(common()->Parameter(2), start);
  Node* p3 = graph()->NewNode(common()->Parameter(3), start);

  StoreRepresentation store_rep(MachineRepresentation::kSimd128,
                                WriteBarrierKind::kNoWriteBarrier);
  LoadRepresentation load_rep(MachineType::Simd128());
  Node* load0 = graph()->NewNode(machine()->Load(MachineType::Int64()), p0,
                                 offset, start, start);
  Node* mem_buffer1 = graph()->NewNode(machine()->Int64Add(), load0, sixteen);
  Node* mem_buffer2 = graph()->NewNode(machine()->Int64Add(), load0, sixteen);
  Node* mem_store = graph()->NewNode(machine()->Int64Add(), load0, sixteen);
  Node* load1 = graph()->NewNode(machine()->ProtectedLoad(load_rep), load0, p1,
                                 load0, start);
  Node* irrelevant_load = graph()->NewNode(machine()->ProtectedLoad(load_rep),
                                           mem_buffer1, p1, load1, start);
  Node* load2 = graph()->NewNode(machine()->ProtectedLoad(load_rep),
                                 mem_buffer1, p1, irrelevant_load, start);
  Node* load3 = graph()->NewNode(machine()->ProtectedLoad(load_rep), load0, p2,
                                 load2, start);
  Node* load4 = graph()->NewNode(machine()->ProtectedLoad(load_rep),
                                 mem_buffer2, p2, load3, start);
  Node* add1 = graph()->NewNode(machine()->F32x4Add(), load1, load3);
  Node* add2 = graph()->NewNode(machine()->F32x4Add(), load2, load4);
  Node* store1 = graph()->NewNode(machine()->Store(store_rep), load0, p3, add1,
                                  load4, start);
  Node* store2 = graph()->NewNode(machine()->Store(store_rep), mem_store, p3,
                                  add2, store1, start);
  Node* ret = graph()->NewNode(common()->Return(0), zero, store2, start);
  Node* end = graph()->NewNode(common()->End(1), ret);
  graph()->SetEnd(end);

  graph()->RecordSimdStore(store1);
  graph()->RecordSimdStore(store2);
  graph()->SetSimd(true);

  // Test whether the graph can be revectorized
  Revectorizer revec(zone(), graph(), mcgraph());
  EXPECT_TRUE(revec.TryRevectorize(nullptr));
}

}  // namespace compiler
}  // namespace internal
}  // namespace v8