PufferLib/src/bindings_cpu.cpp at 47521fb7c042b273d11fd54a1aa3b260c88464f4 · PufferAI/PufferLib · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
// bindings_cpu.cpp - CPU-only Python bindings (no nvcc/CUDA required)

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

#define _PUFFER_STRINGIFY(x) #x
#define PUFFER_STRINGIFY(x) _PUFFER_STRINGIFY(x)
#include <cstring>

// vecenv.h header section gives us StaticVec, Dict, cudaStream_t typedef
#include "vecenv.h"

namespace py = pybind11;

// Stub out CUDA functions that the static lib references (dead code when gpu=0)
extern "C" {
typedef int cudaError_t;
typedef int cudaMemcpyKind;
cudaError_t cudaHostAlloc(void**, size_t, unsigned int) { return 0; }
cudaError_t cudaMalloc(void**, size_t) { return 0; }
cudaError_t cudaMemcpy(void*, const void*, size_t, cudaMemcpyKind) { return 0; }
cudaError_t cudaMemcpyAsync(void*, const void*, size_t, cudaMemcpyKind, cudaStream_t) { return 0; }
cudaError_t cudaMemset(void*, int, size_t) { return 0; }
cudaError_t cudaFree(void*) { return 0; }
cudaError_t cudaFreeHost(void*) { return 0; }
cudaError_t cudaSetDevice(int) { return 0; }
cudaError_t cudaDeviceSynchronize(void) { return 0; }
cudaError_t cudaStreamSynchronize(cudaStream_t) { return 0; }
cudaError_t cudaStreamCreateWithFlags(cudaStream_t*, unsigned int) { return 0; }
cudaError_t cudaStreamQuery(cudaStream_t) { return 0; }
const char* cudaGetErrorString(cudaError_t) { return "stub"; }
}

// ============================================================================
// CPU advantage (same as puff_advantage_row_scalar but plain C++)
// ============================================================================

static void py_puff_advantage_cpu(
        long long values_ptr, long long rewards_ptr,
        long long dones_ptr, long long importance_ptr,
        long long advantages_ptr,
        int num_steps, int horizon,
        float gamma, float lambda, float rho_clip, float c_clip) {
    const float* values = (const float*)values_ptr;
    const float* rewards = (const float*)rewards_ptr;
    const float* dones = (const float*)dones_ptr;
    const float* importance = (const float*)importance_ptr;
    float* advantages = (float*)advantages_ptr;
    for (int row = 0; row < num_steps; row++) {
        int off = row * horizon;
        float lastpufferlam = 0;
        for (int t = horizon - 2; t >= 0; t--) {
            int t_next = t + 1;
            float nextnonterminal = 1.0f - dones[off + t_next];
            float imp = importance[off + t];
            float rho_t = imp < rho_clip ? imp : rho_clip;
            float c_t = imp < c_clip ? imp : c_clip;
            float r_nxt = rewards[off + t_next];
            float v = values[off + t];
            float v_nxt = values[off + t_next];
            float delta = rho_t * r_nxt + gamma * v_nxt * nextnonterminal - v;
            lastpufferlam = delta + gamma * lambda * c_t * lastpufferlam * nextnonterminal;
            advantages[off + t] = lastpufferlam;
        }
    }
}

// ============================================================================
// Dict helpers (same as bindings.cu)
// ============================================================================

static double get_config(py::dict& kwargs, const char* key) {
    if (!kwargs.contains(key))
        throw std::runtime_error(std::string("Missing config key: ") + key);
    return kwargs[key].cast<double>();
}

static Dict* py_dict_to_c_dict(py::dict py_dict) {
    Dict* c_dict = create_dict(py_dict.size());
    for (auto item : py_dict) {
        const char* key = PyUnicode_AsUTF8(item.first.ptr());
        try { dict_set(c_dict, key, item.second.cast<double>()); }
        catch (const py::cast_error&) {}
    }
    return c_dict;
}

// ============================================================================
// VecEnv wrapper
// ============================================================================

struct VecEnv {
    StaticVec* vec;
    int total_agents;
    int obs_size;
    int num_atns;
    std::vector<int> act_sizes;
    std::string obs_dtype;
    size_t obs_elem_size;
};

static std::unique_ptr<VecEnv> create_vec(py::dict args, int gpu = 0) {
    (void)gpu;
    py::dict vec_kwargs = args["vec"].cast<py::dict>();
    py::dict env_kwargs = args["env"].cast<py::dict>();
    int total_agents = (int)get_config(vec_kwargs, "total_agents");
    int num_buffers = (int)get_config(vec_kwargs, "num_buffers");
    Dict* vec_dict = py_dict_to_c_dict(vec_kwargs);
    Dict* env_dict = py_dict_to_c_dict(env_kwargs);

    auto ve = std::make_unique<VecEnv>();
    {
        py::gil_scoped_release no_gil;
        ve->vec = create_static_vec(total_agents, num_buffers, 0, vec_dict, env_dict);
    }
    ve->total_agents = total_agents;
    ve->obs_size = get_obs_size();
    ve->num_atns = get_num_atns();
    {
        int* raw = get_act_sizes();
        int n = get_num_act_sizes();
        ve->act_sizes = std::vector<int>(raw, raw + n);
    }
    ve->obs_dtype = std::string(get_obs_dtype());
    ve->obs_elem_size = get_obs_elem_size();
    return ve;
}

static void vec_reset(VecEnv& ve) {
    py::gil_scoped_release no_gil;
    static_vec_reset(ve.vec);
}

static void cpu_vec_step_py(VecEnv& ve, long long actions_ptr) {
    memcpy(ve.vec->actions, (void*)actions_ptr,
        (size_t)ve.total_agents * ve.num_atns * sizeof(float));
    {
        py::gil_scoped_release no_gil;
        cpu_vec_step(ve.vec);
    }
}

static py::dict vec_log(VecEnv& ve) {
    Dict* out = create_dict(32);
    static_vec_log(ve.vec, out);
    py::dict result;
    for (int i = 0; i < out->size; i++)
        result[out->items[i].key] = out->items[i].value;
    free(out->items);
    free(out);
    return result;
}

static size_t state_dtype_size(const char* dtype) {
    if (!strcmp(dtype, "int8") || !strcmp(dtype, "uint8")) return 1;
    if (!strcmp(dtype, "int16") || !strcmp(dtype, "uint16")) return 2;
    if (!strcmp(dtype, "int32") || !strcmp(dtype, "uint32") || !strcmp(dtype, "float32")) return 4;
    if (!strcmp(dtype, "int64") || !strcmp(dtype, "uint64") || !strcmp(dtype, "float64")) return 8;
    throw std::runtime_error(std::string("my_state: unknown dtype ") + dtype);
}

// Snapshot of env-exported state (my_state hook). Fields are copied into
// Python-owned bytes unless the env flags them PUFF_STATE_ZERO_COPY, in which
// case a read-only view of the C buffer is returned (invalidated by close()).
// Returns an empty dict for envs that do not implement the hook.
static py::dict vec_state(VecEnv& ve, int env_id) {
    if (env_id < 0 || env_id >= ve.vec->size)
        throw std::runtime_error("state: env_id out of range");
    StateField fields[PUFF_MAX_STATE_FIELDS];
    int n = my_state(static_vec_env_at(ve.vec, env_id), fields, PUFF_MAX_STATE_FIELDS);
    py::dict result;
    for (int i = 0; i < n; i++) {
        size_t count = 1;
        py::tuple shape(fields[i].ndim);
        for (int d = 0; d < fields[i].ndim; d++) {
            shape[d] = fields[i].dims[d];
            count *= (size_t)fields[i].dims[d];
        }
        size_t nbytes = count * state_dtype_size(fields[i].dtype);
        py::dict entry;
        if (fields[i].flags & PUFF_STATE_ZERO_COPY) {
            entry["data"] = py::memoryview::from_memory(fields[i].data, (py::ssize_t)nbytes);
        } else {
            entry["data"] = py::bytes((const char*)fields[i].data, nbytes);
        }
        entry["dtype"] = fields[i].dtype;
        entry["shape"] = shape;
        result[fields[i].name] = entry;
    }
    return result;
}

static void vec_close(VecEnv& ve) {
    static_vec_close(ve.vec);
    ve.vec = nullptr;
}

// ============================================================================
// Module
// ============================================================================

PYBIND11_MODULE(_C, m) {
    m.attr("precision_bytes") = 4;
    m.attr("env_name") = PUFFER_STRINGIFY(ENV_NAME);
    m.attr("gpu") = 0;

    m.def("puff_advantage_cpu", &py_puff_advantage_cpu);
    m.def("create_vec", &create_vec, py::arg("args"), py::arg("gpu") = 0);

    py::class_<VecEnv, std::unique_ptr<VecEnv>>(m, "VecEnv")
        .def_readonly("total_agents", &VecEnv::total_agents)
        .def_readonly("obs_size", &VecEnv::obs_size)
        .def_readonly("num_atns", &VecEnv::num_atns)
        .def_readonly("act_sizes", &VecEnv::act_sizes)
        .def_readonly("obs_dtype", &VecEnv::obs_dtype)
        .def_readonly("obs_elem_size", &VecEnv::obs_elem_size)
        .def_property_readonly("gpu", [](VecEnv&) { return 0; })
        .def_property_readonly("obs_ptr", [](VecEnv& ve) { return (long long)ve.vec->observations; })
        .def_property_readonly("rewards_ptr", [](VecEnv& ve) { return (long long)ve.vec->rewards; })
        .def_property_readonly("terminals_ptr", [](VecEnv& ve) { return (long long)ve.vec->terminals; })
        .def("reset", &vec_reset)
        .def("cpu_step", &cpu_vec_step_py)
        .def("render", [](VecEnv& ve, int env_id) { static_vec_render(ve.vec, env_id); })
        .def("state", &vec_state, py::arg("env_id") = 0)
        .def("log", &vec_log)
        .def("close", &vec_close);
}