|
21 | 21 |
|
22 | 22 | <meta name="description" content="策略评估的方式 如何不断的改进策略?用什么来评估:答案是基于return进行评估。 return是一条控制轨迹能够获得的奖励或折扣奖励的和,能够用来评估当前状态的价值。 状态的价值 举个例子,在Grid机器人里,现在有3条路径: 路径1:智能体从S1到S3,奖励是0,从S3到S4,奖励是1,呆在S4,一直有奖励1. 路径2:智能体从S1到S2,奖励是-1,从S2到S4,奖励是1,呆在"> |
23 | 23 | <meta property="og:type" content="article"> |
24 | | -<meta property="og:title" content="强化学习学习笔记(二)贝尔曼方程"> |
25 | | -<meta property="og:url" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/index.html"> |
| 24 | +<meta property="og:title" content="强化学习笔记(二)贝尔曼方程"> |
| 25 | +<meta property="og:url" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/index.html"> |
26 | 26 | <meta property="og:site_name" content="Run's Studio"> |
27 | 27 | <meta property="og:description" content="策略评估的方式 如何不断的改进策略?用什么来评估:答案是基于return进行评估。 return是一条控制轨迹能够获得的奖励或折扣奖励的和,能够用来评估当前状态的价值。 状态的价值 举个例子,在Grid机器人里,现在有3条路径: 路径1:智能体从S1到S3,奖励是0,从S3到S4,奖励是1,呆在S4,一直有奖励1. 路径2:智能体从S1到S2,奖励是-1,从S2到S4,奖励是1,呆在"> |
28 | 28 | <meta property="og:locale" content="zh_CN"> |
29 | | -<meta property="og:image" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/image.png"> |
30 | | -<meta property="og:image" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/image-1.png"> |
31 | | -<meta property="og:image" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/image-3.png"> |
| 29 | +<meta property="og:image" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/image.png"> |
| 30 | +<meta property="og:image" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/image-1.png"> |
| 31 | +<meta property="og:image" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/image-3.png"> |
32 | 32 | <meta property="article:published_time" content="2026-01-04T12:10:57.000Z"> |
33 | | -<meta property="article:modified_time" content="2026-01-04T13:06:25.516Z"> |
| 33 | +<meta property="article:modified_time" content="2026-02-25T03:17:09.083Z"> |
34 | 34 | <meta property="article:tag" content="交通"> |
35 | 35 | <meta name="twitter:card" content="summary_large_image"> |
36 | | -<meta name="twitter:image" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/image.png"> |
| 36 | +<meta name="twitter:image" content="https://runsstudio.github.io/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%BA%8C%EF%BC%89%E8%B4%9D%E5%B0%94%E6%9B%BC%E6%96%B9%E7%A8%8B/image.png"> |
37 | 37 |
|
38 | 38 |
|
39 | 39 |
|
40 | | - <title>强化学习学习笔记(二)贝尔曼方程 - Run's Studio</title> |
| 40 | + <title>强化学习笔记(二)贝尔曼方程 - Run's Studio</title> |
41 | 41 |
|
42 | 42 | <link rel="stylesheet" href="https://lib.baomitu.com/twitter-bootstrap/4.6.1/css/bootstrap.min.css" /> |
43 | 43 |
|
|
205 | 205 | <div class="banner-text text-center fade-in-up"> |
206 | 206 | <div class="h2"> |
207 | 207 |
|
208 | | - <span id="subtitle" data-typed-text="强化学习学习笔记(二)贝尔曼方程"></span> |
| 208 | + <span id="subtitle" data-typed-text="强化学习笔记(二)贝尔曼方程"></span> |
209 | 209 |
|
210 | 210 | </div> |
211 | 211 |
|
|
276 | 276 | <div class="container nopadding-x-md" id="board-ctn"> |
277 | 277 | <div id="board"> |
278 | 278 | <article class="post-content mx-auto"> |
279 | | - <h1 id="seo-header">强化学习学习笔记(二)贝尔曼方程</h1> |
| 279 | + <h1 id="seo-header">强化学习笔记(二)贝尔曼方程</h1> |
280 | 280 |
|
281 | 281 |
|
282 | 282 | <div class="markdown-body"> |
@@ -410,8 +410,8 @@ <h1 id="状态的价值">状态的价值</h1> |
410 | 410 |
|
411 | 411 | <div class="license-box my-3"> |
412 | 412 | <div class="license-title"> |
413 | | - <div>强化学习学习笔记(二)贝尔曼方程</div> |
414 | | - <div>https://runsstudio.github.io/2026/01/04/强化学习学习笔记(二)贝尔曼方程/</div> |
| 413 | + <div>强化学习笔记(二)贝尔曼方程</div> |
| 414 | + <div>https://runsstudio.github.io/2026/01/04/强化学习笔记(二)贝尔曼方程/</div> |
415 | 415 | </div> |
416 | 416 | <div class="license-meta"> |
417 | 417 |
|
@@ -450,9 +450,9 @@ <h1 id="状态的价值">状态的价值</h1> |
450 | 450 | <article class="post-prev col-6"> |
451 | 451 |
|
452 | 452 |
|
453 | | - <a href="/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%B8%80%EF%BC%89%E5%9F%BA%E7%A1%80%E6%A6%82%E5%BF%B5/" title="强化学习学习笔记(一)基础概念"> |
| 453 | + <a href="/2026/01/04/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%EF%BC%88%E4%B8%80%EF%BC%89%E5%9F%BA%E7%A1%80%E6%A6%82%E5%BF%B5/" title="强化学习笔记(一)基础概念"> |
454 | 454 | <i class="iconfont icon-arrowleft"></i> |
455 | | - <span class="hidden-mobile">强化学习学习笔记(一)基础概念</span> |
| 455 | + <span class="hidden-mobile">强化学习笔记(一)基础概念</span> |
456 | 456 | <span class="visible-mobile">上一篇</span> |
457 | 457 | </a> |
458 | 458 |
|
|
0 commit comments