diff --git a/CMakeLists.txt b/CMakeLists.txt index 2be0f7da5..4b4600fb2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,9 @@ list(APPEND CMAKE_MODULE_PATH "${USD_LOCATION}/lib/cmake/" 0) # Define the project and build options include(options) +# OpenGL must be found before importing pxrTargets (garch links OpenGL::GL) +find_package(OpenGL) + # Global required packagse find_package(USD REQUIRED) find_package(Arnold REQUIRED) diff --git a/SConstruct b/SConstruct index 53bdd744c..2e2aa8123 100644 --- a/SConstruct +++ b/SConstruct @@ -239,6 +239,17 @@ ARNOLD_API_LIB = os.path.abspath(env.subst(env['ARNOLD_API_LIB'])) ARNOLD_BINARIES = os.path.abspath(env.subst(env['ARNOLD_BINARIES'])) +## To be removed : if AiQueryAOV is defined in ai_render.h, then we enable fast viewport in this build +ai_render_h = os.path.join(ARNOLD_API_INCLUDES, 'ai_render.h') +try: + with open(ai_render_h) as f: + if 'AiQueryAOV' in f.read(): + env.Append(CPPDEFINES=['FAST_VIEWPORT_SUPPORT']) + print('AiQueryAOV found: enabling FAST_VIEWPORT_SUPPORT') +except IOError: + pass + + if not is_windows and env['RPATH_ADD_ARNOLD_BINARIES']: env['RPATH'] = ARNOLD_BINARIES diff --git a/cmake/modules/FindUSD.cmake b/cmake/modules/FindUSD.cmake index 1ebd0e51e..2753d0814 100644 --- a/cmake/modules/FindUSD.cmake +++ b/cmake/modules/FindUSD.cmake @@ -226,7 +226,7 @@ if (HOUDINI_LOCATION) message(STATUS "USD version: ${USD_VERSION}") # List of usd libraries we need for this project - set(ARNOLD_USD_LIBS_ arch;tf;gf;vt;sdr;sdf;usd;plug;trace;work;hf;hd;usdImaging;usdLux;pxOsd;cameraUtil;ar;usdGeom;usdShade;pcp;usdUtils;usdVol;usdSkel;usdRender;js) + set(ARNOLD_USD_LIBS_ arch;tf;gf;vt;sdr;sdf;usd;plug;trace;work;hf;hd;usdImaging;usdLux;pxOsd;cameraUtil;ar;usdGeom;usdShade;pcp;usdUtils;usdVol;usdSkel;usdRender;js;hgi;hgiGL) if (${USD_VERSION} VERSION_LESS "0.25.05") list(APPEND ARNOLD_USD_LIBS_ ndr) endif() @@ -412,7 +412,7 @@ endif () # Look for the dynamic libraries. # Right now this is using a hardcoded list of libraries, but in the future we should parse the installed cmake files # and figure out the list of the names for libraries. -set(USD_LIBS ar;arch;cameraUtil;garch;gf;glf;hd;hdMtlx;hdSt;hdx;hf;hgi;hgiGL;hgInterop;hio;js;kind;pcp;plug;pxOsd;sdf;sdr;tf;trace;usd;usdAppUtils;usdGeom;usdHydra;usdImaging;usdImagingGL;usdLux;usdMedia;usdRender;usdRi;usdRiImaging;usdShade;usdSkel;usdUI;usdUtils;usdviewq;usdVol;usdVolImaging;vt;work;usd_ms) +set(USD_LIBS ar;arch;cameraUtil;garch;gf;glf;hd;hdMtlx;hdSt;hdx;hf;hgi;hgiGL;hgInterop;hio;js;kind;pcp;plug;pxOsd;sdf;sdr;tf;trace;usd;usdAppUtils;usdGeom;usdHydra;usdImaging;usdImagingGL;usdLux;usdMedia;usdRender;usdRi;usdRiImaging;usdShade;usdSkel;usdUI;usdUtils;usdviewq;usdVol;usdVolImaging;vt;work;hgi;hgiGL;js;usd_ms) if (${USD_VERSION} VERSION_LESS "0.25.05") list(APPEND USD_LIBS ndr) endif() diff --git a/libs/common/constant_strings.h b/libs/common/constant_strings.h index 2404d6ed3..305f90e12 100644 --- a/libs/common/constant_strings.h +++ b/libs/common/constant_strings.h @@ -141,6 +141,8 @@ ASTR(debug); ASTR(desc); ASTR(deprecated); ASTR(driver_exr); +ASTR(fast_viewport); +ASTR(final_output); ASTR(FLOAT); ASTR(GI_diffuse_depth); ASTR(GI_diffuse_samples); @@ -561,6 +563,7 @@ ASTR(velocities); ASTR(vertices); ASTR(vidxs); ASTR(viewMtx); +ASTR(viewport_rendering); ASTR(visibility); ASTR(vlist); ASTR(volume); diff --git a/libs/render_delegate/config.cpp b/libs/render_delegate/config.cpp index decc03892..f613bd352 100755 --- a/libs/render_delegate/config.cpp +++ b/libs/render_delegate/config.cpp @@ -116,6 +116,10 @@ TF_DEFINE_ENV_SETTING(HDARNOLD_asset_searchpath, "", "Asset search path."); TF_DEFINE_ENV_SETTING(HDARNOLD_auto_generate_tx, true, "Auto-generate Textures to TX"); +#ifdef FAST_VIEWPORT_SUPPORT +TF_DEFINE_ENV_SETTING(HDARNOLD_fast_viewport, true, "Enable fast viewport"); +#endif + HdArnoldConfig::HdArnoldConfig() { bucket_size = std::max(1, TfGetEnvSetting(HDARNOLD_bucket_size)); @@ -158,6 +162,10 @@ HdArnoldConfig::HdArnoldConfig() #endif osl_includepath = TfGetEnvSetting(HDARNOLD_osl_includepath); auto_generate_tx = TfGetEnvSetting(HDARNOLD_auto_generate_tx); + +#ifdef FAST_VIEWPORT_SUPPORT + fast_viewport = TfGetEnvSetting(HDARNOLD_fast_viewport); +#endif } const HdArnoldConfig& HdArnoldConfig::GetInstance() { return TfSingleton::GetInstance(); } diff --git a/libs/render_delegate/config.h b/libs/render_delegate/config.h index 7345eb384..43b1f37b0 100755 --- a/libs/render_delegate/config.h +++ b/libs/render_delegate/config.h @@ -197,6 +197,9 @@ struct HdArnoldConfig { /// bool auto_generate_tx; + /// Use HDARNOLD_fast_viewport to set the value. + /// + bool fast_viewport; private: /// Constructor for reading the values from the environment variables. diff --git a/libs/render_delegate/render_buffer.cpp b/libs/render_delegate/render_buffer.cpp index 83144b856..45a6e055f 100644 --- a/libs/render_delegate/render_buffer.cpp +++ b/libs/render_delegate/render_buffer.cpp @@ -16,6 +16,9 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "render_buffer.h" +#include "render_delegate.h" +#include +#include #include #include @@ -24,6 +27,12 @@ // memcpy #include +#include + +#include +#ifdef FAST_VIEWPORT_SUPPORT +#include +#endif // TOOD(pal): use a more efficient locking mechanism than the std::mutex. PXR_NAMESPACE_OPEN_SCOPE @@ -197,9 +206,230 @@ WriteBucketFunctionMap writeBucketFunctions{ } // namespace -HdArnoldRenderBuffer::HdArnoldRenderBuffer(const SdfPath& id) : HdRenderBuffer(id) +HdArnoldRenderBuffer::HdArnoldRenderBuffer(HdArnoldRenderDelegate* renderDelegate, + const SdfPath& id) : HdRenderBuffer(id), _renderDelegate(renderDelegate) +{ +} + +namespace { + +// Attempts to (re)create the HgiGL texture. Returns the GL texture id, which may be 0 if +// no GL context was current at call time. Caller holds the buffer's mutex. +// +// The texture format hard-coded to HgiFormatFloat32Vec4 (GL_RGBA32F) regardless of the +// Hd-side format, because that is what AiQueryAOV expects to write into. The Hd-side +// _format is unchanged so CPU consumers (and Hydra's format introspection) keep seeing +// what was requested; the actual GL texture sampled by the compositor reads its format +// from the Hgi descriptor, which is always RGBA32F. +uint32_t _CreateGpuTexture( + Hgi* hgi, HgiTextureHandle& outTexture, unsigned int width, unsigned int height, HdFormat /*format*/, + const TfToken& aovName, const char* suffix) +{ + if (hgi == nullptr) return 0; + if (width == 0 || height == 0) return 0; + HgiTextureDesc desc; + std::string debugName = aovName.IsEmpty() ? "HdArnoldRenderBuffer" : aovName.GetString(); + if (suffix != nullptr && suffix[0] != '\0') { + debugName += '.'; + debugName += suffix; + } + desc.debugName = debugName; + desc.type = HgiTextureType2D; + desc.dimensions = GfVec3i(static_cast(width), static_cast(height), 1); + desc.format = HgiFormatFloat32Vec4; + desc.layerCount = 1; + desc.mipLevels = 1; + desc.sampleCount = HgiSampleCount1; + desc.usage = HgiTextureUsageBitsShaderRead | HgiTextureUsageBitsColorTarget; + outTexture = hgi->CreateTexture(desc); + if (auto* gl = dynamic_cast(outTexture.Get())) { + return gl->GetTextureId(); + } + return 0; +} + +uint32_t _GetGlTextureId(const HgiTextureHandle& texture) +{ + if (auto* gl = dynamic_cast(texture.Get())) { + return gl->GetTextureId(); + } + return 0; +} + +#ifdef FAST_VIEWPORT_SUPPORT + +// Fullscreen blit with vertex-shader Y flip (Arnold top-origin -> OpenGL bottom-origin). +struct _GpuFlipBlit { + GLuint program = 0; + GLuint vertexBuffer = 0; + + void _CompileShader(const char* src, GLenum stage, GLuint* outShader) const + { + *outShader = glCreateShader(stage); + glShaderSource(*outShader, 1, &src, nullptr); + glCompileShader(*outShader); + GLint status = 0; + glGetShaderiv(*outShader, GL_COMPILE_STATUS, &status); + if (status != GL_TRUE) { + TF_WARN("HdArnoldRenderBuffer: GPU Y-flip shader compile failed"); + } + } + + void _EnsureInitialized() + { + if (program != 0) { + return; + } + + static const char* const kVertexShader120 = R"(#version 120 +attribute vec4 position; +attribute vec2 uvIn; +varying vec2 uv; +void main(void) { + gl_Position = position; + uv = vec2(uvIn.x, 1.0 - uvIn.y); } +)"; + + static const char* const kFragmentShader120 = R"(#version 120 +varying vec2 uv; +uniform sampler2D colorIn; +void main(void) +{ + gl_FragColor = texture2D(colorIn, uv); +} +)"; + + GLuint vs = 0; + GLuint fs = 0; + _CompileShader(kVertexShader120, GL_VERTEX_SHADER, &vs); + _CompileShader(kFragmentShader120, GL_FRAGMENT_SHADER, &fs); + + program = glCreateProgram(); + glAttachShader(program, vs); + glAttachShader(program, fs); + glLinkProgram(program); + GLint linkStatus = 0; + glGetProgramiv(program, GL_LINK_STATUS, &linkStatus); + if (linkStatus != GL_TRUE) { + TF_WARN("HdArnoldRenderBuffer: GPU Y-flip shader link failed"); + } + glDeleteShader(vs); + glDeleteShader(fs); + + static const float vertices[] = { + // position (xyzw) uv + -1.0f, 3.0f, 0.0f, 1.0f, 0.0f, 2.0f, + -1.0f, -1.0f, 0.0f, 1.0f, 0.0f, 0.0f, + 3.0f, -1.0f, 0.0f, 1.0f, 2.0f, 0.0f, + }; + glGenBuffers(1, &vertexBuffer); + glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + + bool Flip(uint32_t srcTextureId, uint32_t dstTextureId, int width, int height) + { + if (srcTextureId == 0 || dstTextureId == 0 || width <= 0 || height <= 0) { + return false; + } + + _EnsureInitialized(); + if (program == 0) { + return false; + } + + GLint restoreFramebuffer = 0; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &restoreFramebuffer); + + GLint restoreViewport[4]; + glGetIntegerv(GL_VIEWPORT, restoreViewport); + + GLint restoreProgram = 0; + glGetIntegerv(GL_CURRENT_PROGRAM, &restoreProgram); + + GLint restoreActiveTexture = 0; + glGetIntegerv(GL_ACTIVE_TEXTURE, &restoreActiveTexture); + + GLuint fbo = 0; + glGenFramebuffers(1, &fbo); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dstTextureId, 0); + + const GLenum fboStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + if (fboStatus != GL_FRAMEBUFFER_COMPLETE) { + TF_WARN("HdArnoldRenderBuffer: GPU Y-flip FBO incomplete (status %u)", static_cast(fboStatus)); + glBindFramebuffer(GL_FRAMEBUFFER, static_cast(restoreFramebuffer)); + glDeleteFramebuffers(1, &fbo); + return false; + } + + glViewport(0, 0, width, height); + glClearColor(0.0f, 0.0f, 0.0f, 0.0f); + glClear(GL_COLOR_BUFFER_BIT); + glUseProgram(program); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, srcTextureId); + const GLint colorLoc = glGetUniformLocation(program, "colorIn"); + glUniform1i(colorLoc, 0); + + GLint restoreArrayBuffer = 0; + glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &restoreArrayBuffer); + + glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer); + const GLint locPosition = glGetAttribLocation(program, "position"); + glVertexAttribPointer(locPosition, 4, GL_FLOAT, GL_FALSE, sizeof(float) * 6, nullptr); + glEnableVertexAttribArray(locPosition); + + const GLint locUv = glGetAttribLocation(program, "uvIn"); + glVertexAttribPointer( + locUv, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 6, reinterpret_cast(sizeof(float) * 4)); + glEnableVertexAttribArray(locUv); + + GLboolean restoreDepthEnabled = glIsEnabled(GL_DEPTH_TEST); + GLboolean restoreDepthMask = GL_TRUE; + glGetBooleanv(GL_DEPTH_WRITEMASK, &restoreDepthMask); + glDisable(GL_DEPTH_TEST); + glDepthMask(GL_FALSE); + + GLboolean restoreBlend = glIsEnabled(GL_BLEND); + glDisable(GL_BLEND); + + glDrawArrays(GL_TRIANGLES, 0, 3); + + if (restoreBlend) { + glEnable(GL_BLEND); + } + if (restoreDepthEnabled) { + glEnable(GL_DEPTH_TEST); + } + glDepthMask(restoreDepthMask); + + glDisableVertexAttribArray(locPosition); + glDisableVertexAttribArray(locUv); + glBindBuffer(GL_ARRAY_BUFFER, static_cast(restoreArrayBuffer)); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, 0); + + glUseProgram(static_cast(restoreProgram)); + glBindFramebuffer(GL_FRAMEBUFFER, static_cast(restoreFramebuffer)); + glViewport(restoreViewport[0], restoreViewport[1], restoreViewport[2], restoreViewport[3]); + glActiveTexture(restoreActiveTexture); + + glDeleteFramebuffers(1, &fbo); + return true; + } +}; + +_GpuFlipBlit _gpuFlipBlit; + +#endif // FAST_VIEWPORT_SUPPORT + +} // namespace bool HdArnoldRenderBuffer::Allocate(const GfVec3i& dimensions, HdFormat format, bool multiSampled) { @@ -207,15 +437,35 @@ bool HdArnoldRenderBuffer::Allocate(const GfVec3i& dimensions, HdFormat format, // So deallocate won't lock. decltype(_buffer) tmp{}; _buffer.swap(tmp); + if (_hgi != nullptr) { + if (_aovTexture) { + _hgi->DestroyTexture(&_aovTexture); + } + if (_texture) { + _hgi->DestroyTexture(&_texture); + } + } if (!_SupportedComponentFormat(format)) { _width = 0; _height = 0; return false; } + _gpuInit = false; TF_UNUSED(multiSampled); _format = format; _width = dimensions[0]; _height = dimensions[1]; + + if (_hgi != nullptr) { + // --GPU buffers-- + // Best-effort: try to create the GPU texture now. If GL context isn't current the + // resulting GL id will be 0 — the render pass will call EnsureGpuTexture() later + // from a GL-active context to retry. + _CreateGpuTexture(_hgi, _aovTexture, _width, _height, _format, _aovName, "aov"); + _CreateGpuTexture(_hgi, _texture, _width, _height, _format, _aovName, "display"); + } + + // --CPU buffers-- const auto byteCount = _width * _height * HdDataSizeOfFormat(format); if (byteCount != 0) { _buffer.resize(byteCount, 0); @@ -223,6 +473,87 @@ bool HdArnoldRenderBuffer::Allocate(const GfVec3i& dimensions, HdFormat format, return true; } +void HdArnoldRenderBuffer::EnsureGpuTexture() +{ + if (_hgi == nullptr || _width == 0 || _height == 0 || _gpuInit) { + return; + } + std::lock_guard _guard(_mutex); + + if (_aovTexture && _texture && _GetGlTextureId(_aovTexture) != 0 && _GetGlTextureId(_texture) != 0) { + _gpuInit = true; + return; + } + + const auto ensureTexture = [&](HgiTextureHandle& tex, const char* suffix) { + if (tex && _GetGlTextureId(tex) != 0) { + return; + } + if (tex) { + _hgi->DestroyTexture(&tex); + } + _CreateGpuTexture(_hgi, tex, _width, _height, _format, _aovName, suffix); + }; + + ensureTexture(_aovTexture, "aov"); + ensureTexture(_texture, "display"); + if (_GetGlTextureId(_aovTexture) != 0 && _GetGlTextureId(_texture) != 0) { + _gpuInit = true; + } +} + +#ifdef FAST_VIEWPORT_SUPPORT +bool HdArnoldRenderBuffer::_FlipAovToDisplayTexture() const +{ + const uint32_t srcId = _GetGlTextureId(_aovTexture); + const uint32_t dstId = _GetGlTextureId(_texture); + return _gpuFlipBlit.Flip(srcId, dstId, static_cast(_width), static_cast(_height)); +} +#endif +VtValue HdArnoldRenderBuffer::GetResource(bool /*multiSampled*/) const +{ +#ifdef FAST_VIEWPORT_SUPPORT + + // GetResource() is called by Hydra/Solaris from the main thread with the GL context + // current. AiQueryAOV does CUDA<->GL interop that requires a current GL context, so + // this is the right place to pull Arnold's latest AOV data into the GL texture. + if (_renderDelegate != nullptr && _renderDelegate->IsFastViewport()) { + auto* self = const_cast(this); + self->EnsureGpuTexture(); + std::lock_guard guard(self->_mutex); + if (_aovTexture && _texture) { + const uint64_t aovGlId = static_cast(_GetGlTextureId(_aovTexture)); + if (aovGlId != 0) { + const AtRenderErrorCode rc = AiQueryAOV( + _renderDelegate->GetRenderSession(), AtString(_aovName.GetText()), aovGlId); + if (rc == AI_SUCCESS) { + // Sync CUDA/GL interop before sampling the AOV texture in our blit. + glFinish(); + if (self->_FlipAovToDisplayTexture() && _GetGlTextureId(_texture) != 0) { + return VtValue(_texture); + } + + } else { + TF_WARN( + "AiQueryAOV failed for AOV \"%s\" (code %d)", _aovName.GetText(), static_cast(rc)); + } + } + // Flip failed or display texture unavailable — show AOV as-is (may be Y-inverted). + return VtValue(_aovTexture); + } + } + +#endif + return VtValue(); +} + +void HdArnoldRenderBuffer::SetHgi(Hgi* hgi) { + if (_hgi != hgi) + _gpuInit = false; + + _hgi = hgi; +} + void* HdArnoldRenderBuffer::Map() { _mutex.lock(); @@ -255,12 +586,25 @@ void HdArnoldRenderBuffer::_Deallocate() std::lock_guard _guard(_mutex); decltype(_buffer) tmp{}; _buffer.swap(tmp); + if (_hgi != nullptr) { + if (_aovTexture) { + _hgi->DestroyTexture(&_aovTexture); + } + if (_texture) { + _hgi->DestroyTexture(&_texture); + } + } + _gpuInit = false; } void HdArnoldRenderBuffer::WriteBucket( unsigned int bucketXO, unsigned int bucketYO, unsigned int bucketWidth, unsigned int bucketHeight, HdFormat format, const void* bucketData) { + // When backed by a GPU texture, bucket data is delivered via AiQueryAOV, not the driver path. + if (_hgi != nullptr) + return; + if (!_SupportedComponentFormat(format)) { return; } diff --git a/libs/render_delegate/render_buffer.h b/libs/render_delegate/render_buffer.h index c32287270..07c4997aa 100644 --- a/libs/render_delegate/render_buffer.h +++ b/libs/render_delegate/render_buffer.h @@ -28,6 +28,8 @@ #include #include +#include +#include #include #include @@ -37,11 +39,13 @@ struct AtNode; PXR_NAMESPACE_OPEN_SCOPE +class HdArnoldRenderDelegate; + /// Utility class for handling render data. class HdArnoldRenderBuffer : public HdRenderBuffer { public: HDARNOLD_API - HdArnoldRenderBuffer(const SdfPath& id); + HdArnoldRenderBuffer(HdArnoldRenderDelegate* renderDelegate, const SdfPath& id); HDARNOLD_API ~HdArnoldRenderBuffer() override = default; @@ -81,6 +85,12 @@ class HdArnoldRenderBuffer : public HdRenderBuffer { HDARNOLD_API bool IsMapped() const override; + /// Returns the GPU texture backing this render buffer, wrapped in a VtValue. + /// Returns an empty VtValue when no Hgi was provided (CPU path). + HDARNOLD_API + VtValue GetResource(bool multiSampled) const override; + + /// Resolve the buffer so that reads reflect the latest writes. /// This buffer does not need any resolving. void Resolve() override {} @@ -96,7 +106,8 @@ class HdArnoldRenderBuffer : public HdRenderBuffer { /// @param converged True if the render buffer is converged, false otherwise. void SetConverged(bool converged) { _converged = converged; } - bool IsEmpty() const {return _buffer.empty();} + bool IsEmpty() const { return _buffer.empty() && !_texture && !_aovTexture; } + HDARNOLD_API void WriteBucket( unsigned int bucketXO, unsigned int bucketYo, unsigned int bucketWidth, unsigned int bucketHeight, @@ -115,17 +126,48 @@ class HdArnoldRenderBuffer : public HdRenderBuffer { }; + /// Provide the host Hgi instance. Must be called before Allocate() to take the GPU path. + /// Passing nullptr keeps the CPU path active. + HDARNOLD_API + void SetHgi(Hgi* hgi); + + /// Ensures the GPU texture has been created with a valid GL id. Call this from a context + /// where the GL context is current (e.g. the render pass's _Execute). If the texture is + /// missing or its GL id is 0 (because the previous create-attempt ran without a GL + /// context), it is destroyed and recreated. + HDARNOLD_API + void EnsureGpuTexture(); + + /// Returns true if this buffer is backed by a GPU texture. + bool HasGpuTexture() const { return static_cast(_texture); } + + /// Provide the Arnold AOV name used when calling AiQueryAOV on this buffer. + HDARNOLD_API + void SetAovName(const TfToken& aovName) { _aovName = aovName; } + private: /// Deallocates the data stored in the buffer. HDARNOLD_API void _Deallocate() override; - std::vector _buffer; ///< Storing render data. +#ifdef FAST_VIEWPORT_SUPPORT + /// Blit from _aovTexture into _texture with a Y flip (Arnold top-origin -> OpenGL). + /// @return True if _texture was updated, false to use _aovTexture as-is. + bool _FlipAovToDisplayTexture() const; +#endif + + std::vector _buffer; ///< Storing render data (CPU path only). + Hgi* _hgi = nullptr; ///< Borrowed Hgi instance, owned by the render delegate's host. + mutable HgiTextureHandle _aovTexture; ///< AiQueryAOV target (Arnold image-space Y). + mutable HgiTextureHandle _texture; ///< Hydra-facing texture after Y flip (GPU path only). + std::atomic _gpuInit = false; std::mutex _mutex; ///< Mutex for the parallel writes. unsigned int _width = 0; ///< Buffer width. unsigned int _height = 0; ///< Buffer height. HdFormat _format = HdFormat::HdFormatUNorm8Vec4; ///< Internal format of the buffer. + HdArnoldRenderDelegate* _renderDelegate = nullptr; ///< Borrowed delegate pointer for accessing the render session. bool _converged = false; ///< Store if the render buffer has converged. + TfToken _aovName; ///< AOV name passed to AiQueryAOV. bool _mapped = false; ///< Whether Map() left the mutex held; consulted by }; diff --git a/libs/render_delegate/render_delegate.cpp b/libs/render_delegate/render_delegate.cpp index 882588f01..0bf2aa99f 100644 --- a/libs/render_delegate/render_delegate.cpp +++ b/libs/render_delegate/render_delegate.cpp @@ -42,6 +42,9 @@ #include #include #include +#include +#include + #ifdef ENABLE_SCENE_INDEX #include #include @@ -392,6 +395,7 @@ const SupportedRenderSettings& _GetSupportedRenderSettings() {str::t_aov_shaders, {"Path to the aov_shaders node graph.", std::string{}}}, {str::t_imager, {"Path to the imagers node graph.", std::string{}}}, {str::t_texture_auto_generate_tx, {"Auto-generate Textures to TX", config.auto_generate_tx}}, + {str::t_fast_viewport, {"Enable fast viewport", config.fast_viewport}}, }; return data; } @@ -735,10 +739,16 @@ void HdArnoldRenderDelegate::_SetRenderSetting(const TfToken& _key, const VtValu auto value = _value.IsHolding() ? VtValue(static_cast(_value.UncheckedGet())) : _value; // Certain applications might pass boolean values via ints or longs. if (key == str::t_enable_gpu_rendering) { - _CheckForBoolValue(value, [&](const bool b) { - AiNodeSetStr(_options, str::render_device, b ? str::GPU : str::CPU); - AiDeviceAutoSelect(GetRenderSession()); - }); + + if (_fastViewport) + AiNodeSetStr(_options, str::render_device, str::GPU); + else + { + _CheckForBoolValue(value, [&](const bool b) { + AiNodeSetStr(_options, str::render_device, b ? str::GPU : str::CPU); + AiDeviceAutoSelect(GetRenderSession()); + }); + } } else if (key == str::t_log_verbosity) { if (value.IsHolding()) { _verbosityLogFlags = _GetLogFlagsFromVerbosity(value.UncheckedGet()); @@ -873,6 +883,18 @@ void HdArnoldRenderDelegate::_SetRenderSetting(const TfToken& _key, const VtValu if (value.IsHolding()) { _resolution = value.UncheckedGet(); } + } + else if (key == str::t_fast_viewport) { +#ifdef FAST_VIEWPORT_SUPPORT + if (value.IsHolding()) { + _fastViewport = value.UncheckedGet(); + AiNodeSetBool(_options, str::viewport_rendering, _fastViewport); + AiNodeSetBool(_options, AtString("ignore_dlss"), true); + if (_fastViewport) { + AiNodeSetStr(_options, str::render_device, str::GPU); + } + } +#endif } else if (key == _tokens->batchCommandLine) { // Solaris-specific command line, it can have an argument "-o output.exr" to override // the output image. We might end up using this for arnold drivers @@ -1357,7 +1379,7 @@ HdBprim* HdArnoldRenderDelegate::CreateBprim(const TfToken& typeId, const SdfPat { // Neither of these will create Arnold nodes. if (typeId == HdPrimTypeTokens->renderBuffer) { - return new HdArnoldRenderBuffer(bprimId); + return new HdArnoldRenderBuffer(this, bprimId); } if (typeId == _tokens->openvdbAsset) { return new HdArnoldOpenvdbAsset(this, bprimId); @@ -1894,6 +1916,20 @@ void HdArnoldRenderDelegate::TrackRenderTag(AtNode* node, const TfToken& tag) } } +void HdArnoldRenderDelegate::SetDrivers(HdDriverVector const& drivers) +{ + if (_isBatch) + return; + + for (HdDriver* driver : drivers) { + if (driver != nullptr && driver->name == HgiTokens->renderDriver && + driver->driver.IsHolding()) { + _hgi = driver->driver.UncheckedGet(); + break; + } + } +} + bool HdArnoldRenderDelegate::SetRenderTags(const TfTokenVector& renderTags) { // In this function we store the provided render tags, and we want to return diff --git a/libs/render_delegate/render_delegate.h b/libs/render_delegate/render_delegate.h index 6662285d7..6440ab022 100644 --- a/libs/render_delegate/render_delegate.h +++ b/libs/render_delegate/render_delegate.h @@ -43,9 +43,10 @@ #include #include #include +#include #include - +#include #include "hdarnold.h" #include "render_param.h" #include "api_adapter.h" @@ -56,7 +57,7 @@ class HydraArnoldReader; PXR_NAMESPACE_OPEN_SCOPE - +class HdArnoldRenderBuffer; struct HdArnoldRenderVar { /// Settings for the RenderVar. HdAovSettingsMap settings; @@ -548,6 +549,16 @@ class HdArnoldRenderDelegate final : public HdRenderDelegate { bool IsBatchContext() const {return _isBatch;} + /// Receives the host application's Hgi instance via the standard + /// HdRenderDelegate driver interface. Stored as a borrowed pointer. + HDARNOLD_API + void SetDrivers(HdDriverVector const& drivers) override; + + /// Returns the borrowed Hgi instance, or nullptr if the host application + /// did not provide one (e.g. batch / husk without a GL context). + Hgi* GetHgi() const { return _hgi; } + + HydraArnoldAPI &GetAPIAdapter() {return _apiAdapter;} /// @brief Get the procedural parent @@ -716,6 +727,7 @@ class HdArnoldRenderDelegate final : public HdRenderDelegate { void SetHasCryptomatte(bool b); void SetInstancerCryptoOffset(AtNode *node, size_t numInstances); + bool IsFastViewport() const {return _fastViewport;} bool IsUsingHydraRenderSettings() const {return _useHydraRenderSettings;} private: @@ -846,9 +858,13 @@ class HdArnoldRenderDelegate final : public HdRenderDelegate { bool _forceIgnoreMotionBlur = false; bool _useHydraRenderSettings = false; std::unordered_map _nodeNames; + bool _fastViewport = false; + Hgi* _hgi = nullptr; ///< Borrowed pointer to the host application's Hgi (set via SetDrivers). + mutable std::mutex _nodeGraphNamesMutex; std::unordered_map _nodeGraphNames; + // We store a list of functions that must be run once all the prims are synced // They will be ran in HasPendingChanges std::mutex _deferredFunctionCallsMutex; diff --git a/libs/render_delegate/render_pass.cpp b/libs/render_delegate/render_pass.cpp index d01342576..3fea3e526 100644 --- a/libs/render_delegate/render_pass.cpp +++ b/libs/render_delegate/render_pass.cpp @@ -623,6 +623,8 @@ void HdArnoldRenderPass::_Execute(const HdRenderPassStateSharedPtr& renderPassSt } const bool framingChanged = newFraming != _framing; + const bool fastViewportChanged = _fastViewport != _renderDelegate->IsFastViewport(); + GfVec4f windowNDC = _renderDelegate->GetWindowNDC(); float pixelAspectRatio = _renderDelegate->GetPixelAspectRatio(); // check if we have a non-default window @@ -639,17 +641,20 @@ void HdArnoldRenderPass::_Execute(const HdRenderPassStateSharedPtr& renderPassSt auto clearBuffers = [&](HdArnoldRenderBufferStorage& storage, bool allocate, int w, int h) { static std::vector zeroData; zeroData.resize(w * h * 4); + for (auto& buffer : storage) { HdArnoldRenderBuffer *renderBuffer = buffer.second.buffer; if (renderBuffer != nullptr && !renderBuffer->IsEmpty()) { + renderBuffer->SetHgi(_renderDelegate->IsFastViewport() ? _renderDelegate->GetHgi() : nullptr); if (allocate && (renderBuffer->GetWidth() != w || renderBuffer->GetHeight() != h)) renderBuffer->Allocate(GfVec3i(w, h, 0), renderBuffer->GetFormat(), renderBuffer->IsMultiSampled()); + renderBuffer->WriteBucket(0, 0, w, h, HdFormatUNorm8Vec4, zeroData.data()); } } }; - if (framingChanged) { + if (framingChanged || fastViewportChanged) { // The render resolution has changed, we need to update the arnold options renderParam->Interrupt(true, false); _framing = newFraming; @@ -657,6 +662,7 @@ void HdArnoldRenderPass::_Execute(const HdRenderPassStateSharedPtr& renderPassSt AiNodeSetInt(options, str::xres, width); AiNodeSetInt(options, str::yres, height); + _fastViewport = _renderDelegate->IsFastViewport(); clearBuffers(_renderBuffers, true, width, height); AiNodeSetInt(options, str::region_min_x, _framing.dataWindow.GetMinX()); AiNodeSetInt(options, str::region_max_x, _framing.dataWindow.GetMaxX()); @@ -919,15 +925,19 @@ void HdArnoldRenderPass::_Execute(const HdRenderPassStateSharedPtr& renderPassSt if (isRaw && sourceName == HdAovTokens->color) { output = AtString{TfStringPrintf("RGBA RGBA %s %s", filterName, mainDriverName).c_str()}; AiNodeSetPtr(_mainDriver, str::color_pointer, binding.renderBuffer); + buffer.buffer->SetAovName(_fastViewport ? str::t_final_output : str::t_RGBA); } else if (isRaw && sourceName == HdAovTokens->depth) { output = AtString{TfStringPrintf("%s %s %s", _depthOutputValue, filterGeoName, mainDriverName).c_str()}; AiNodeSetPtr(_mainDriver, str::depth_pointer, binding.renderBuffer); + buffer.buffer->SetAovName(str::t_Z); } else if (isRaw && sourceName == HdAovTokens->primId) { aovShaders.push_back(_primIdWriter); output = AtString{TfStringPrintf("%s INT %s %s", str::hydraPrimId.c_str(), filterGeoName, mainDriverName) .c_str()}; AiNodeSetPtr(_mainDriver, str::id_pointer, binding.renderBuffer); + buffer.buffer->SetAovName(str::t_id); + } else { // Querying the data format from USD, with a default value of color3f. TfToken format = _GetOptionalSetting( @@ -1015,6 +1025,7 @@ void HdArnoldRenderPass::_Execute(const HdRenderPassStateSharedPtr& renderPassSt "%s %s %s %s %s", aovName, arnoldTypes.outputString, filterName, mainDriverName, layerName.c_str()) .c_str()}; + buffer.buffer->SetAovName(TfToken(layerName.c_str())); } outputs.push_back(output); } diff --git a/libs/render_delegate/render_pass.h b/libs/render_delegate/render_pass.h index de349b9e8..52862acfb 100755 --- a/libs/render_delegate/render_pass.h +++ b/libs/render_delegate/render_pass.h @@ -141,6 +141,7 @@ class HdArnoldRenderPass : public HdRenderPass { // Window NDC region, that can be used for overscan, or to adjust the frustum GfVec4f _windowNDC = GfVec4f(0.f, 0.f, 1.f, 1.f); + bool _fastViewport = false; bool _isConverged = false; ///< State of the render convergence. }; diff --git a/libs/translator/CMakeLists.txt b/libs/translator/CMakeLists.txt index 022d34999..45ea9d7c0 100644 --- a/libs/translator/CMakeLists.txt +++ b/libs/translator/CMakeLists.txt @@ -97,7 +97,7 @@ else () # USD shared lib build if (LINUX) target_link_libraries(translator INTERFACE dl pthread) endif () - target_link_libraries(translator INTERFACE gf sdf tf usd ar arch pcp vt work) + target_link_libraries(translator INTERFACE gf sdf tf usd ar arch pcp vt work hgi hgiGL js) target_link_libraries(translator INTERFACE usdGeom usdShade usdUtils usdLux usdVol usdSkel usdRender) endif () endif () diff --git a/tools/utils/dependencies.py b/tools/utils/dependencies.py index aea6d9b74..aec9da047 100644 --- a/tools/utils/dependencies.py +++ b/tools/utils/dependencies.py @@ -78,6 +78,11 @@ def render_delegate(env, sources): usd_libs += ['boost','python',] if env['USD_VERSION_INT'] >= 2505: usd_libs += ['hdsi','ts',] + + # needed for fast viewport code path + if env['USD_BUILD_MODE'] != 'static': + usd_libs += ['hgi', 'hgiGL', 'garch'] + return add_plugin_deps(env, sources, usd_libs, True)