30 std::vector<int8_t>& shaped_w)
const {
31 const int num_out = w.
dim1();
32 const int num_in = w.
dim2() - 1;
37 shaped_w.resize((rounded_num_in + 1) * rounded_num_out, 0);
45 int num_outputs_per_register_set =
48 while (output + num_outputs_per_register_set <= rounded_num_out) {
53 for (
int j = 0; j < num_outputs_per_register_set; ++j) {
58 if (output + j < num_out && input + i < num_in)
59 weight = w(output + j, input + i);
60 shaped_w[shaped_index++] = weight;
65 for (
int j = 0; j < num_outputs_per_register_set; ++j) {
67 if (output + j < num_out) weight = w(output + j, num_in);
68 shaped_w[shaped_index++] = weight;
70 output += num_outputs_per_register_set;
81 const int8_t* u,
double* v) {
82 int num_out = w.
dim1();
83 int num_in = w.
dim2() - 1;
85 for (
int i = 0; i < num_out; ++i) {
86 const int8_t* wi = w[i];
88 for (
int j = 0; j < num_in; ++j) total += wi[j] * u[j];
90 v[i] = (
static_cast<double>(total) / INT8_MAX + wi[num_in]) * scales[i];
int num_inputs_per_group_
static void MatrixDotVector(const GENERIC_2D_ARRAY< int8_t > &w, const GenericVector< double > &scales, const int8_t *u, double *v)
int max_output_registers_
int RoundOutputs(int size) const
int num_outputs_per_register_
static int Roundup(int input, int factor)
static const IntSimdMatrix * intSimdMatrix
void Init(const GENERIC_2D_ARRAY< int8_t > &w, std::vector< int8_t > &shaped_w) const