executorch/backends/xnnpack/runtime/XNNExecutor.h at 3b7281da9b193ec0aeec2968bbe36dd2bce2ba7c · GregoryComer/executorch · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <executorch/backends/xnnpack/runtime/XNNStatus.h>
#include <executorch/backends/xnnpack/runtime/XNNWorkspace.h>
#include <executorch/backends/xnnpack/runtime/profiling/XNNProfiler.h>
#include <executorch/runtime/backend/interface.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>

#include <xnnpack.h>
#include <memory>
#include <vector>

namespace executorch {
namespace backends {
namespace xnnpack {
namespace delegate {

class XNNExecutor {
 private:
  std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> runtime_{
      nullptr,
      &xnn_delete_runtime};

  profiling::XNNProfiler profiler_;
  std::vector<uint32_t> input_ids_;
  std::vector<uint32_t> output_ids_;
  std::vector<xnn_external_value> externals_;
  std::vector<std::string> packed_data_names_;
  std::shared_ptr<XNNWorkspace> workspace_;

 public:
  XNNExecutor(std::shared_ptr<XNNWorkspace> workspace)
      : workspace_(workspace) {}

  inline size_t getNumInputs() {
    return input_ids_.size();
  }

  inline size_t getNumOutputs() {
    return output_ids_.size();
  }

  inline std::vector<std::string> get_packed_data_names() {
    return packed_data_names_;
  }

  inline bool uses_weight_cache() const {
    return !packed_data_names_.empty();
  }

  inline std::shared_ptr<XNNWorkspace> get_workspace() {
    return workspace_;
  }

  /**
   * Initialize the XNNExecutor with a given runtime and input/output ids.
   * The input/output ids are expected to be sorted in order of their
   * flatbuffer id_outs
   */
  ET_NODISCARD executorch::runtime::Error initialize(
      xnn_runtime_t runtime,
      std::vector<uint32_t>&& input_ids,
      std::vector<uint32_t>&& output_ids,
      std::vector<std::string>&& packed_data_names);

  /**
   * Prepares the arguments for runtime graph execution.
   * args is an array of EValues that will be passed into the runtime.
   * input shapes will be propagated through the runtime, and perform
   * any additional memory planning as needed
   */
  ET_NODISCARD executorch::runtime::Error prepare_args(
      executorch::runtime::Span<executorch::runtime::EValue*> args);

  /**
   * Executes the graph using the args prepared at prepare_args().
   */
  ET_NODISCARD executorch::runtime::Error forward(
      executorch::ET_RUNTIME_NAMESPACE::BackendExecutionContext& context);

  /**
   * Resizes output tensors to match XNNPACK's computed shapes.
   *
   */
  ET_NODISCARD executorch::runtime::Error resize_outputs(
      executorch::runtime::Span<executorch::runtime::EValue*> args) const;

  /**
   * Converts output data types after XNNPACK execution.
   *
   * For arg_max pooling, XNNPACK outputs int32 index tensors that need
   * to be converted to int64 for ExecuTorch.
   */
  ET_NODISCARD executorch::runtime::Error convert_outputs(
      executorch::runtime::Span<executorch::runtime::EValue*> args) const;

  friend class XNNCompiler;
};

} // namespace delegate
} // namespace xnnpack
} // namespace backends
} // namespace executorch