diff --git a/.gitignore b/.gitignore index 6208c37c43..fa897baf9c 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ yarn-error.log* .idea *.info.mdx +*.pyc .tool-versions @@ -30,6 +31,7 @@ yarn-error.log* # Environment variables .env +**/cookies.txt stellar-cli-repo diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 3f7570a17f..4eb17f3ffd 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -108,6 +108,7 @@ const config: Config = { postsPerPage: 12, routeBasePath: 'meetings', onUntruncatedBlogPosts: 'ignore', + exclude: ['**/README.md'], }, docs: { showLastUpdateTime: true, diff --git a/meeting-notes/authors.yml b/meeting-notes/authors.yml index 9d92d724b3..eb27b21f5b 100644 --- a/meeting-notes/authors.yml +++ b/meeting-notes/authors.yml @@ -49,6 +49,9 @@ julian: image_url: https://github.com/Julian-dev28.png socials: github: Julian-dev28 +placeholder: + name: Unknown + image_url: /img/docusaurus/authors/nobody.svg kaankacar: name: Kaan Kacar title: Developer Advocate diff --git a/meetings/2023-05-04.mdx b/meetings/2023-05-04.mdx new file mode 100644 index 0000000000..855b332bfc --- /dev/null +++ b/meetings/2023-05-04.mdx @@ -0,0 +1,146 @@ +--- +title: "Soroban Design Discussion 5/4/2023" +description: "Soroban is a smart contracts platform that is designed with purpose and built to perform. Learn more about Soroban here: https://soroban.stellar.org/" +authors: + - placeholder +tags: [developer] +--- + +import YouTube from "@site/src/components/YouTube"; + + + +### Key Points + +- And so I think, You know, I think there were talks last. +- Discussion focused on contract. +- Discussion focused on UN. +- Discussion focused on Ledger. + + +### Resources + + + +
+ Video Transcript + +[00:00] So, so, without further Ado, we're going to keep talking about State expiration, today. And I think Garden dropped a bit of a bomb a couple of days ago. So I think we're gonna start with that, and again, if you can give us a quick overview of some of the suggested changes, changes, changes. Yeah, sure, so up until this point. the current Legacy expiration, proposal, has had this thing called a rent balance. So, essentially, what you would do is that, whenever you know, create an entry or want to do a rent bump, you would put elm into this rent balance and then every Ledger a variable amount of a fee called the rent, fee would be removed from this rent balance and the rent feed could either decrease or increase based on the size, of the bucket list and network usage So the reasoning behind this initially was that we want to provide some sort of market equilibrium. That as The Bucket List gets larger and as the price of writing to the bucket list increases, we. + +[01:00] Writing to the bucket list increases, we're writing to the bucket list increases, we also have increased pressure to evict, and expire entries out of the bucket list. And so the thinking with this kind of variable rent fee is that, as the Buckle list grows you, the rent fee also, grows, and so that you deduct rent balance more aggressively, and so more entries will fall to zero or negative rent balances and then be expired more frequently. So that's kind of the original thinking as to why we had this variable rent balance and this variable rent fee. Now the issue with that is there are two primary issues. The first is just the user experience story. So with run balance, because the rent fee that's deducted is variable from ledger to Ledger. You can estimate how many ledgers You think your entry will be live on the. network, but you can't know for certain So, for instance, say if you initially paid for 100 ledgers of rent, but you know there are a bunch of, merges, so the bucket list size decreased a lot. Then your entry could live much longer. Conversely, if you paid for, say, 10 years of rent, your entry might only last one year. If the network explodes lasts one year. If the network explodes in popularity and the Buckle of size. + +[02:00] In popularity and the Buckle of size increases and so it's kind of weird user interface where the users expect an entry's lifetime to be measured in ledgers. But really you have like these ledgers, but really you have like this rent balance and like this variable thing, you really can't control it and so that was issue. Number one is that it was. difficult for users to understand and it is difficult for users to understand, and it didn't have a great user Story. they didn't have a great user Story. The second issue is that of Downstream. systems. And so because the rent fee is variable from ledger to Ledger, you don't know when, or you can't predict what the Run fee will be, and so you cannot predict an entry's rent balance. Now in Stellar core, we have, like this bucket list data structure which is like you know, specially designed with this, multiple levels and like this log scripted merge tree approach, and we've designed the rent balance system, to work very well with the bucket list, such that on bucket list and on bucket list DB. you always know exactly how much rent balance your entry has and we can do this efficiently without. iterating through every entry and then you know decrementing its rent balance, by the current letters: rent fee. We + +[03:00] by the current letters rent fee. We buy the current letters rent fee. We can, like you know, get around it using some optimizations of the bucket list, structure. Now the issue for Downstream systems is that Downstream systems don't have a copy of the bucket list. You know they're running captive core but the way that a captive Quorum core is, currently designed. It's too expensive to query a captive core directly and so that means that in order for these, that means that in order for these-Downstream systems, primarily Horizon, Downstream systems, primarily Horizon- and sort of on RPC nodes in order for, them to have accurate rent balance, information. What they would need is to essentially either maintain their own version of The Bucket List and apply rent via like a copy of the bucket list, Or do some like very inefficient, Operations, operations, operations with like SQL where you like, essentially cache some amount of rent. fees and then lazily apply them in the background, or something like that, but in either case it's its a lot of work. for Downstream systems: a lot of disk i o and things like that. So those are these are the two main drawbacks. And so when thinking about this issue, we thought from our first principles: okay, do we? + +[04:00] From our first principles. Okay, do we need this variable rent fee? Do we need a rent balance, or can we use a definitive expiration Ledger? And so that's kind of what today's conversation about is rent. balance versus expiration Ledger. Now how the expiration ledger works is instead of having The Ledger entry store, a rent balance field that is periodically deducted from. Instead, all it stores is a single entry, which is expiration Ledger. It's very simple, essentially, before the. expiration Ledger: the entry is live and accessible after the expiration Ledger. The entry has run out of rent and is not. accessible. And then, once the entry is passed this expiration Ledger, it can be expired, which is when it's deleted from The Bucket List. And then, if it's a temporary entry, it could be permanently deleted, or, if it's a restorable entry, it's deleted and then stay sent to a. state restoration node or something like that. That. That now the advantages to the expiration, Ledger approach are that it's significantly easier for Downstream, systems, because they don't have two systems, because they don't have to periodically update entries with rent, balance information, The Meta that's. + +[05:00] Balance information. The Meta, that's initially emitted, whenever an entry is created or an entry receives a rent bump we'll tell Downstream systems exactly. when an entry should expire, and so it's pretty straightforward, from the implementation standpoint. It also makes a lot more sense for users who expect that a lifetime is measured in kind of Ledger, a discrete and predictable values. Now, the drawback to this is that we can't have this Dynamic eviction, pressure feature that we have in rent balance. So how the expiration Ledger implementation will work with respect to fees. Is what we would do is take whatever the current market rent fee, rate is based on the size of The Blacklist, and then lock that right in and charge that rate for the entire, lifetime of the object or of the entry. So, for instance, say you are creating a. new entry that has one year's worth of rent, you would be charged one year's worth of rent at the current rent fee. Now, the issue with this is supposed that within that year, The Bucket List size, increases significantly. Essentially what + +[06:00] increases significantly. Essentially, what you would be doing is because you locked in that rage when the size was small. you'd be paying an artificially low rate. compared to other entries that are being added onto the network, later And so you have this weird system where, these entries can be kind of, grandfathered in, so to speak, such that they pay low fees because it's gotten. at the ground level, when the network is low, low, low, and so with rent balances, you don't have. This grandfather thing, because, even if you create an entry on Day Zero answer, up on launch day, when the buck list was at, you know theoretical smallest size Because every entry is subject to the same variable rent fee, no matter when-same variable rent fee, no matter when they were created the old entries, and new entries are charged the same rate. however, because you have to essentially, lock in the rate on creation time around. bump time with an expiration Ledger, approach. That's not possible And so I think what we want to think, about if we want to make expiration, letters work, because I think the two benefits to Downstream systems and to the user experience are very, very significant benefits, and so we should. + +[07:00] Significant benefits, and so we should be significant benefits, and so we should think about expiration Ledges, pretty seriously, seriously, seriously. I think that we need to make sure I, think the thing we want to prevent most, are kind of two scenarios first. Where you have a system where users or there's, smart Contracting can essentially provide a storage interface for cheaper, than the protocol can. If you can imagine say, on day Zero when the bucket list is, small, someone spins up a smart contract that has public functions, that exactly mirror the storage functions of that, the protocol exposes. So you have, like you know, get you know. Create just the exact same, interface. But instead of calling it directly through the Saroyan SDK, you would just make a call to a Smart, contract for your storage needs, and so what this storage contract could do is it could just, you know, buy up, say the. maximum amount of Ledger entries. possible, possible on day Zero with say like 100 years of rent or some very large value and essentially permanently lock in those entries for 100 years at the lowest possible rent fee. And then say two or + +[08:00] possible rent fee and then say two or three years down the line. You know if the Stellar Network explodes, and huge, the rent fee will be significantly, higher. And so what this contract can do is essentially auction off this rent is essentially auctioned off. This rent is essentially auction off this rent space at an artificially low rate, and so you can essentially use a middleman. contract to get cheaper storage than you could if you actually went to the protocol directly. And so this is really protocol directly. And so this is terrible from a Network Health perspective, bad from a Network Health perspective because essentially every storage call, now has to pass this middleman and so you have lots of additional traffic lots. of additional, overhead for serving storage, just because essentially you have a bug and exploitability in the way that you charge rent, and so I think to prevent against that, specific exploit where you have like these, this storage contract, these, these storage contract interfaces, what you would need is some sort of upper bound on the amount of Ledger or the lifetime of an entry at any given point. Essentially the thinking, being that you know if you allow arbitrarily large amounts, of rent purchases, then you could say: + +[09:00] of rent purchases, then you could say: lock in an entry for 100 years, which is far too long, because the price will probably significantly increase in that, lifetime. But if you have a maximum lifetime of, say, six months, it's unlikely that a storage contract interface could be profitable with only six months, difference between the initial rent fee, paid and then the rent you'd be providing their users. And so I think the plan is, if we do expiration ledgers is to have a network parameter which is the. maximum rent, or the maximum lifetime of a given entry on The Ledger, and so this, could be something like six months, or one year, and this would be a network parameter. And so what we can do is you want this value to be as large, as possible from a user perspective, just so you can provide the most flexibility, and usability, but you want to be small enough such that you don't have these rent related exploits, and so that would be a number that we can change by a BS a number that we can change by a network vote, just so that we can essentially tune this value up or down, as we see exploits happening on the network, or as we see that no exploits are happening on the network. + +[10:00] Are happening on the network in addition to this issue. We also kind of like the middleman storage, contract. We also want to make sure that we're not allowing a bunch of spam. Entries to lock in very low rates, take up space on the network and essentially up the price for everyone else. That's doing legitimate work. That's not spam. And so you can imagine that on Day Zero. when the know buckless has a small, You have a bunch of airdrops that take. whatever the maximum lifetime is, say six. months, a year or whatever. And then essentially just mint spam airdrop, tokens, and tell the bucket list size increases such that's no longer profitable, profitable, profitable. Now. The issue with this is that they can essentially, even though you know each additional spam token, they put on the network raises the rent fees slightly. for the next spam token. They can still do this very fast and very quickly. Take, up the cheapest rent options with all of this spam and unlike an event balance, this spam, and unlike an event balance-this spam. And unlike an event balance approach, even though the spam has caused the rent fees to be high for everyone, else, because the spam did it first. They + +[11:00] else because the spam did it first. They are not subject to those higher fees and you can't evict them. Which means you can, have like this essentially. These events where if, for whatever reason, the bucket list size decreases in the rent fee decreases rapidly, you can have all these spam tokens kind of come in and fill the Gap in very quickly, get the network up to that you know high rent fee rate, again at the cost of all of these spam entries taking up more time, than they probably should, And so I think that's probably where I, want to get the conversation started, and open up the floor of questions but those are the biggest pros and cons, of each, the pros of rent balance being You know this eviction pressure, and defining these exploits natively, without like the need for like a maximum rent, balance or something like that, the con being Downstream systems and usability Grand- there was a question around the downstream systems. Can you just quickly outline the-the difference, or specifically with exploration Ledger, what would be the downstream systems? + +[12:00] What would be the downstream systems, Kind of like expectations. Yeah, so I think the, the, the issue with the rent balance versus expiration Ledger is just that: an expiration letter, we can put in meta so that the downstream systems can be directly told what the expiration Ledger is. but there's no way to emit meta for the variable rent, Balance bombs, just because they're to balance bombs, just because they're too frequent. So, for instance, with expiration, Ledger, just because it's set a single time on creation, or whenever an entry has its is expiration or its lifetime extended. What you can do is just emit meta that says, okay, this key. has this lifetime. And then they start in a SQL database and then, whenever you are a SQL database, and then whenever you access that entry, you can just spit out the lifetime easy, and so you can contain all that information in meta with rent, balance. The issue is the initial rep Balance is in the meta. Sure so say. Like this has a starting value of a thousand. elm. But the issue is, every Ledger has a variable amount that's deducted from that thousand elm. And because every single Ledger, or every single Ledger, entry is subject to this variable rent. + +[13:00] Entry is subject to this variable rent Every single Ledger closed. What we'd, have to do is if to contain this information in meta, literally submit an information in meta, literally, submit a meta entry for every single entry on The Ledger, periodically with the updated rent information. And that's just not possible. And so, essentially the downstream systems would have to manage this rent balance themselves and we, could probably emit the variable rent, fee as meta, every Ledger close, but then it would be the responsibility of the downstream systems to implement essentially the rent balance bookkeeping, themselves, if that makes sense. It does sound for the most part, like. a win-win. So I definitely want to UN probably focus on like. What are the pros, of, of the current Red Diamond system, compared to exploration Ledger. + +[14:00] Compared to exploration Ledger. Yes, I think it comes to eviction. pressure and kind of not being exploitable or being more game proof, and so that's the primary issue with locking in a primary issue, with locking in an essentially a rent fee at creation time, is that you open up these, vulnerabilities for the kind of like a vulnerability for the kind of like a contract storage middleman and for a long life spam entries that increase rent fees for everyone else, and so those are the two kind of. I think exploits that we want to prevent happening, And so, if we can find essentially a suitable limitation on the suitable limitations, on the expiration Ledger, like one limitation being not allowing arbitrarily large, lifetimes to defeat those two exploits, then I think it would be a good, decision. There's also a couple of other decision. there are also a couple of other drawbacks. I think these are all solvable drawbacks. Furthermore, I think these are all solvable problems, but just something to think. about. Is that there are certain issues like: because you're locking in a like, because you're locking in an expiration Ledger, expiration Ledger-there are questions about what to do. There are questions about what to do whenever you resize an entry, so for + +[15:00] whenever you resize an entry. So for, instance, say, you create an entry, that's only one byte large and say, okay, I want it to have an expiration Ledger 10 years. from now. And so you pay the rent fee for 10 years, but for only one byte And then two ledgers later, you say oh, this entry is now 100 kilobytes, Then you essentially need to, reconcile. Okay, what do we do in this? situation? Do we either a shorted the lifetime because the rent fee is now higher, because it's you know, a larger, entry size? just for reference. The rent fee is charged per byte and so you need to pay more rent in order to have the same expiration Ledger you did before and so my current thinking for that particular, Edge case, is that the expiration Ledger should never decrease. For instance, I should never decrease. For instance, a White or an update or changing the size: write or an update or changing the size of an entry should never decrease it's of an entry should never decrease its lifetime. It should only match or increase the lifetime, which means that if you have an entry that has say 10, if you have an entry that has said 10 years worth of or that has an expiration Ledger, 10. + +[16:00] Or that has an expiration Ledger 10, years in the future. If you resize that entry, then you, what you need to do is you need to pay for essentially the. difference, so you'd have to pay in this example, if you had a one byte entry and now it's a know 10 byte entry, you'd- have to pay for 10 bytes or for the nine, Additional bytes, Whenever you do that update, and so I, think there are like issues like that. and then the two exploit, exploitability cases, that rent balance, solves really elegantly because rent balance, you know it's charged every Ledger. And so if you resize it then they'll the know new, Additional fees will just be picked up. on the next Ledger, and so it's handled automatically. And then these exploits are not possible, just because your rent for your paying is always up-to-date, and so you can never game the up to date. and so you can never game the system by locking in a rent fee early and then using it later when the rent fee is more expensive. + +[17:00] Is there anyone on stage or in the? audience that want to speak in favor of rent balances? Okay, okay. So yeah, I just want to comment, on this from a, product perspective. I definitely think that this is a big win because the user is this is a big win because the user experience of the previous proposal, experience of the previous proposal-red balances is definitely kind of red balances, is definitely kind of like requires a bit of a complex mental model of what rent actually means, Because when people you know pay rent, they are used to kind of like locking a. specific grade for a specific given of time, a specific given amount of time so I do think that this new proposal, kind of like sits better with like a mental model of what rent actually is, even though we're not using the word rent, hear hear. + +[18:00] One question that I also asked on Discord, Discord, Discord is around a. the question of temporary entries. And to the question of like Auto bumps, and what is, what are the implications for these Yeah, so I think my current thinking is: that we should still have Auto bumps, but the auto bump should be optional, and optional-well, let me explain what. And optional, well, let me explain what I mean by optional. And so I think in this system, we still want an auto bump. system such that you know frequently, used entries, and shared entries, such as contract instances and contract code was mare paid for and so that was envisioning is that before we were like bumping by some, you know amount of elm now because we have this expiration Ledger interface, we would just Bump by some modest amount, like 10 ledgers per access, automatically, and some of that would look like. Is that you would pay at the current market rate. whatever the current rent fee is for. + +[19:00] Whatever the current rent fee is for, the extension. And so even if the entries say had like three years worth of rent, that was paid for two years ago, so it was very cheap. Whenever you access, it, you would still need to pay for the. additional 10 ledgers at the current market rate, And so that's for automatic bumps I, think, think, think what we should do for auto bumps is especially for temporary entries. There, were use cases where other bumps are, useful and use cases where Auto bumps are not. And so I think, when creating and are not. And so I think, when creating an entry, what you should do is on the initial item creation. You can set a flag-initial item creation. You can set a flag, and that flag is either Auto bumps-true, false, and so what this allows you to do is that the original developer, when you're creating the entry, can choose if this entry is something that should be bumped, or something that is like you know, short-lived, so it should expire, and then whenever you access that entry, that flag is stored in The Ledger entry and so the Access Auto bump is determined by. that initial create time flag. And so I that initial creates time flag. And so I think Auto bumps are optional, but they're not optional by The Entity. that's accessing the entry. they are. + +[20:00] That's accessing the entry. They're optional, based on the entity that's creating the entry. If that makes sense, and so I think that makes the most sense. Now another thing for temporary entries. is because we now have this expiration, Ledger. I know before we kind of went back and forth as to you know if we should have expert or temporary entries, with like firm or exact, cut-offs or not, or like that expire on the exact Ledger entries. And so I think under this system, now that we don't use rent balances, that should be very possible and easy to. do, such that you can now use temporary entries for security features, such that if you want an entry that lasts exactly, 100 ledgers, what you would do is just say: make a temporary entry, set the expiration ledger to you know, or set the TTL to 100 and then set auto bump to false, false, false. Now I think, we still need to actually, I still, need to think about that a little bit, more as to if we can, because right. now we are allowing both temporary and + +[21:00] now we are allowing both temporary, and restorable entries to be bumped by anyone anytime. And so I take back that said they still might not be appropriate, for security uses out of the box, But so what I'm kind of envisioning, right now is the autobump flag and then in addition to the automatic bumps on, access, which both temporary and restorable entries, restorable entries-have you can also still manually have, you can also still manually bump any entry, both temporary, and restorable, restorable, restorable via an operation and that operation is similar to what was in rent balance, But now you just specify the new. expiration Ledger, and then my thinking-expiration Ledger. And then my thinking is that whenever you bump, or whenever you pay for more, you can. Either there are two options here. Sorry, October, were you saying something? No, sorry, oh so I think there's whenever you. do a manual bump operation to extend the expiration Ledger. There are two ways we could think about it. First you could. either be credited for the amount you've, already paid and then view it as an + +[22:00] already paid and then view it as an extension. So, for instance, say you know I have a ledger that's set. to expire in a year and I want to expire, it to expire in 18 months. One potential solution would be the rent bump operation only charges you at the current market rate for six more months, of rent. It says, hey, there's already you know 12 months of rent here. So we're known 12 months of rent here. So we're only going to charge you for the additional six. So the total comes out to 18 months. That's option one. Now the drawback to that option is again for. that first 12 months. You're locking in a. lower rate, in that last six months is now at the market rate. So that's option one option: two is that whenever you do a manual. bump, you don't count the previous balance and the previous balance is burned, burned, burned. What this would be is that, if you, know there's an entry that you want to live 18 months, but it currently lives. It lives 18 months but it currently lives. It currently has an expiration. That's only 12 months in the future. You have to pay 18 months of rent at the current market. + +[23:00] 18 months of rent at the current market price and then the expiration Ledger is reset and that in this way, essentially the 12 months that it was already there, is burned, and you're charged for the entire 18 months of the market rate, Now, the advantage to this, is, again, I think, for a Network Health perspective, and to prevent capability, we want to be charging as close to Market, rates whenever we can, and so in these rates whenever we can, and so in this system, you always can be charging you know the market rate. However, it's kind of a poor UX because you're burning this amount, amount, amount. It seems that, like in something like, this should be probably strictly, additive. And so I guess, as far as the two interfaces are concerned, what are your thoughts as two manual rent? Bumps? That was a lot. I just want to go back. to a point you made earlier, quickly about temporary entries. So if I understand correctly, you are suggesting: that the temporary entries, if we do go for Ledger, expert for expiration, Ledger, the, the. + +[24:00] Ledger, the, the the contract should be the same, that is, we're kind of like letting go of the whole, whole, short, medium, long terms and making it the exact same interface as restorable, Ledger entries: yeah, I think so. Now the one thing though: is I initially said they were fit for us. I initially said they were fit for security uses, but then I caught myself: because, right now, any user can bump any. entry, which means that even if you have, say like, a KYC entry, that's only supposed to last 128 ledgers, and you initially set its expiration alleged to be 128 ledgers in the future. A malicious user could bump that using the manual operation. And so I think for security purposes, We still need to. That still needs to. be enforced at the contract level, whereas, like in this case, the contract. would need to embed their own like TTL inside the temporary entry, just because you have this arbitrary, bump, I mean I guess we could have a flag that's like no bump to. + +[25:00] Have a flag that's like no bump to Temporary entries, but that might be adding too many, flags at this point. Because the current interfaces: anyone can bump anything. Okay, thanks for answering that. question. Going back to Auto bumps for a second: I do think you know I've been thinking about the, the wallet and DAP, experience, experience, experience and what they need to think about in terms of State expiration, and it does seem like autobump doesn't, negate the need for wallets and apps, to actually be attentive to The Ledger, expiration times and to act on that. Either by like suggesting the user to, do like a manual bump or to initiate a manual bump, so it is does seem like Auto bump. Has this like implicit Behavior to it in? which we're like extending Ledger, entry expression times? But it doesn't. + +[26:00] Entry expression times, but it doesn't actually, actually. Actually it like adds complexity to the system. but it doesn't actually remove. complexity from the implementation of like products. It makes things a bit less less less expected and predictable, So I think yeah, go ahead well. I think the yeah, go ahead well. Furthermore, I think the auto bumps serve a very specific purpose. or at least they were initially designed, to serve a specific purpose. And this is the shared state. So, from your example, like, like a DAP, or like a wallet, Right, right, right. This is not a good example, for auto bumps, because you have a particular balance entry that you care about and no one else really cares about. And so for a wallet perspective, say you want your wallet to live for. one year, the correct answer is not just to keep you balancing it with via Smart, contract, so you Auto bump it, or rather just to do a manual operation and then to bump that by a year. The use case for autobus is more for contract instances. and contract Wasm, particularly Wasm, is a. + +[27:00] And contract Wasm, particularly Wasm, is a difficult one, because you can have a single Mazzini blob, that many different contract instances use, and the question: is who pays for that was and blob? And so essentially, by having like this Auto, bump feature, that every user who touches it is required to pay a little bit, And you can essentially like to share the, and you can essentially like share the cost of. You know this contract and this contract instance or this contract code in this contract instance, among all the users who are using it, and so I think the other bump feature was specifically for this kind of shared resources, that there is no clear owner, for instance like USDA. It'd be kind, of a crappy interface. If you know 10 000 people used it a day, but there was no Auto bump. And then the 10 000 in first person had to go pay rent because it got archived or something, and he was just the. Or they were just the Unlucky, user that drew the Short Straw, and so I think there definitely still is a. pretty strong use case for auto bumps, for this sort of different entry types, But I agree for something like a. + +[28:00] But I agree for something like a. but I agree for something like an adapter a wallet. You would still need You know this manual operation in. addition to audit bumps, oh, oh, would you recommend a ledger entry that? is a user specific like a balance or an? LP position, to be Auto jumpable or not Auto jumpable, I mean, I think it just depends on the contract. implementation. Furthermore, I don't think it needs to be Auto jumpable. The same way that contract instances and stuff do But I don't see like a downside, I think. the general thinking is the. The kind of design model I had in my head is that extending a lifetime, should never be negative, That like this is why it's not good. for security cases. Because anyone can extend any lifetime such that it should always be a positive action. It cannot be a negative action. And so I think, You know, I think there were talks last. + +[29:00] You know, I think there were talks last. time of exposing an Autobahn flag and I think this probably makes the most. sense: that you know contract instances, and contract wise in code You don't have this option. Whenever you deploy it, it has. It must be Auto. bumped. But then for any other key you create. I think you probably just give create. Furthermore, I think you probably just give the option too, or should be just Because there's also like I mean you, could also say that the contract instances and contract code are Auto bumped and nothing else is. but then you can kind of get into this sticky, situation where there are still contract types that have shared States. So for instance, if you can think like a a, like a DEX right, and there's like this asset pair that start as an entry, and many different people are like viewing or trading that asset pair. you're viewing or trading that asset pair, you wouldn't want one individual to be stuck, with the bill. And so I think that there are still, I still think Auto bump is an are still, I still think Auto bump is a powerful primitive and I think it should be powerful primitive, and I think it should probably be enabled by default, just because it's the safest route, But I think you know having a but I think you know having an option to turn it off, especially for personal State, like balances, is probably + +[30:00] personal State like balances is probably a good idea. And so, now that I have a good idea, and so, now that I've talked for a little bit, I think the answer your question. I would probably answer your question. Furthermore, I would probably say for something like a token balance. Furthermore, I say for something like a token balance, I would probably say no to the auto bump, Behavior, just because it's something that will probably like the lifetime of which will be explicitly managed by a wallet. Because I, I agree it's kind of needed, The one thing that maybe, like I'm thinking about, there, is that So for the first version we can do you, know something like that, right, like I would imagine that maybe in the future we would want to have, like, maybe, something that, that kind of Auto Tunes over time, Because, like in the example of, like the you have like a very you know active, contract. The warm is basically going to be used multiple times per Ledger, and you end up with really reaching your + +[31:00] and you end up with really reaching your, limit fairly quickly and then, when the limit is reach, you actually are, backed into that situation. Where you know some users are going to bump, some are not going to bump, like basically the first one in The Ledger, all right, and the bumping and then yeah and that, and also like the if, the what, if the blob is fairly large, maybe that auto bump on the ends up, adding quite a bit of cost to each, individual transaction, if it's you know bumping like for a good. number of Legends. Let's say you know 100 ledgers or something. to keep in mind. Garden: there is a question about the mechanics of Autobahn from Paul. Can your mechanics of Autobahn from Paul, can you expand a bit on when does Auto bump actually occur, Yeah. So the current strategy is that. autobump occurs on all access which is + +[32:00] autobump occurs on all access which is both read and write access, How this works under the hood is that you know, in addition to having the expiration Ledger stored in The Ledger entry, we also have like this kind of shell entry type, that is used for read-only Access. So for instance, like for your basin blob, they're only access, read only, and essentially, to modify a ledger entry in, The Bucket List, you have to rewrite the entire entry at the top level bucket, And so if we were to modify the know the entry or the expiration know the entry or the, the expiration knows the entry or the expiration Ledger directly in The Ledger entry you'd have to rewrite the entire basin. blob and So to avoid this, we have like this kind of shim entry type which is just an expiration Ledger extension and so this entry is very small. It's literally just a key. And then the new, expiration Ledger, and so we use this entry type, entry type-whenever we want to bump a read-only, whenever we want to bump a read-only entry. And so, in the was muse case, even, + +[33:00] entry. And so, in the was muse case, even though you're only reading the Wasm, because we are Auto bumping, you do have to do a small write, but that right is very small, it's the minimal size, Right, you can do, and so we are implicitly turning every read into a read, write, But we're not rewriting the entire, entry. We're reading the entire entry and then writing a very small entry with the new expiration Ledger, and so under the hood, that's how we implement it. efficiently, efficiently, efficiently, rent bumps for both reads and writes. so I do want to touch on something that you, mentioned before, which is the question of with. Should bump of with? Should bumping like pumping in general? Should that be a flag? It does feel like you know Last week we talked a bit about. you know, very various Oracle usage. + +[34:00] You know very various Oracle usage, patterns, and you know we got to inclusion where sometimes the contract developer would want to limit or give, a developer would want to limit or give an upper boundary to when a ledger entry should exist, and and and making it non-durable is a very kind of like easy way to do that, The question is like: does that over complex the system, Well, so I guess we have. two. different questions here, I think we have the question of: do we? Furthermore, I think we have the question of: do we want to, Does the entry, Auto bump, and then does the entry allow bumps, And so I think, from an implementation, standpoint, these would be very easy to implement, implement, implement. We could just, you know, thrown a flag. We could just, you know, thrown a Flags field on The Ledger entry and Define a couple of flags, I think, the question is, is this, you know, making the user? experience too complex, And so what this would look like is I + +[35:00] and so what this would look like is I, think, think this would only be explicable to Temporary entries, I think or Okay, so let's talk about the. Autobahn flag first. I think the Autobahn flag first. Furthermore, I think the Autobahn flag, flag, whether this item is auto, bumped on access. It could be an optional flag for both temporary and restorable entries. But I think it should be strictly enforced for contract Wasm and contract instances now for the. Should you allow bumps at all? I think we could, allow that flag, but that flag could only be used for temporary entries, and so this would be the use case. where you either have like a security use case, where you want this to do exactly some number of ledgers or the Oracle use case where the thing is only valid for five minutes or whatever, and then if you set this flag, then this entry would not receive Auto bumps, and it would also not be jumpable by the manual operation and if you tried to bump it, it would just fail or Panic or something like that: now you would only want this black on, temporary entries, because restorable entries and unique entries should always be jumpable. + +[36:00] Be jumpable, They. They don't necessarily need to always have Auto bombs, but they should always be jumpable, just because they are important. Information that needs to be saved, which is why they are subject to being sent to the state expiration node, when they expired, and so just because the design parameter for this entry, is that it's supposed to be important, Live State, There should be no use cases where you wouldn't want to bump, or wouldn't want to allow a unique or restorable entry to be jumpable, and so I think that is the most technically complete, kind of interface, it's very you know feasible from a. core perspective. The only question is if that's too much complexity at the end. user. I think that, as you described this, If it creates more Divergence between, temporary entries and restorable entries, then then then, probably for the sake of Simplicity, I, say we shouldn't include it, especially because this is something that the contract developer can program for in. + +[37:00] Contract developer can program for in their contract, I will say if we, go this route, and say if we, I think so. So if I say if we, I think so. So if I'm understanding it correctly, we don't want Divergence, and so we'd have an autobump flag, flag, flag true, false, but we would not have the no bump flag. I think if we go this route, no bump flag. Furthermore, I think if we go this route, we need to be very clear in our. documentation that the expiration Ledger is not absolute, Because, if you know, have like a temporary entry, self-delink entry and then there's a field called expiration. Ledger. It's a reasonable assumption that The Ledger would be deleted immediately. after that ledger. And if we don't have this no bump flag, that's not the case. this is no bump flag, that's not the case. Because a malicious user, could you know, invoke the operation and bump any. temporary entry, even if autobump is disabled. And so you know, I think that's a fine interface decision to make. that all entries are jumpable, but I think we need to make that very clear, just from an UX perspective, so that we don't have security issues with, temporary entries being used improperly. + +[38:00] Any questions or comments on this? What do people think about? Should we have the bump jumpable flag? I'm just going to type a big thing, but I could actually say it I think I would vote for having. Furthermore, I think I would vote for not having an Autobahn flag, if possible, just having a fewer, configuration options for the different types of storage. Like there are two types of storage, like there's two different types of storage, and they act-different types of storage and they act differently. I think that's fine too differently. Furthermore, I think that's fine to explain and understand, But if you have to say okay, there's. But if you have to say okay, there are restorable entries and there's temporary restorable entries and there are temporary entries, and they act in this way unless you enable auto bump on them, and then restore volunteers. Like you know what I mean. There's like four different means. There's like four different configurations Now versus just two So, Paul, are you talking about the? jumpable flag or the auto jumpable flag? Sorry, yes, sorry, jumpable, okay. + +[39:00] Yeah, I think that makes sense to me. It's: just not everything is bumped, but just so I understand. Are we still? interested in the Autobahn flag or do interest in the Autobahn flag, or do we also want to Nick say that, and just, say: everything I bump as well. I definitely don't think that everything, should be Auto jumpable by default, Yeah, it does sound like for from a. completeness perspective, having both of these flags kind of like covers, most use cases. I mean one thing we can do us. I mean one thing we can do is just like. If we don't want to expose the jumpable flag. Now we can just Define it. in the Dr and defined in core, just not exposing the SDK. and then turn that on V2 if we want to, you know, have a more complicated UX. Matrix for storage, That would at least give us future. + +[40:00] That would at least give us future, proofing, or like I think, actually, if we're proofing, or like I think actually, if we just just just if is we were defining Auto bump anyway, then we need to have a Flags field, for the album flag, and so we can easily extend it later, And so I think you know the jumpable, versus no jumpable. That could be a V2 feature, feature, feature. But I think, unless there's like a bit I think, unless there's like a strong, strong one for it now, we can be just strong, strong one for it now. We can just leave that off for now and just have the autobump flag. I'm not sure that the Autobahn, really benefit the network It might benefit the ease of use, but not necessarily the network itself. Right, sure, I guess the are having more. more IO churn for sure, I guess the there didn't seem to be a. Furthermore, I guess the there didn't seem to be a great solution for the contract instance, And contract was in the case without, Autobahn. So I think you know it for it. + +[41:00] Autobahn, so I think you know it for it: might be useful to say, maybe like, I think those two entries need something. Furthermore, I think those two entries need something like Auto bump or need some way of essentially like automatically, or pooling rent together in some way, because that was kind of the, the, the. thing we were trying to think about with the contract Wasm is okay. You have like four or five different instances, that all have the same was and back end, how do you equitably and equally share the? load when it comes to expiration ledgers? I, I think that my, my, the direction that I'm thinking about, is that, if someone was uploading a contract, I want to see his hotels' involvement in. Furthermore, I want to see his hotels' involvement in the future on maintaining that, and not so much on leaving it and assuming that it will be funded by + +[42:00] assuming that it will be funded. By someone else like I, I want to see like an activity right on that contract from the owner, and not by someone else that might be inherently using it, Oh, fishes that contracts don't have an. owner like who doesn't can be reported. by anyone, and you pay anyone So you know if you have a the creditable. Implementation that people are using, There is no clear owner. Yes, someone. might have written in the code but anyone can deploy it and yeah, it's kind of hard to track to. anyone and demand you know maintenance from someone, I mean you could expect it, but it's, like the Canada universally demand this. + +[43:00] Like the Canada universally demand this, Yeah, I feel like there's kind of the. The concept of like a library contract is definitely a big thing on Ethereum and so it feels like an unfair expectation. to write something like a DAP, Implementation that has you know that's that's more of a library function and expect the library owner to be the sole, payer for that, even though other contract instances are using that. Wiseman, making money off the Wasm it just seems weird that if five you know, different contract instances are using and profiting of Wasm that only the know entity that originally deployed the Wasm should be on the hook for paying. for it. So I would argue that if someone is, using something for free, then he shouldn't, Instead, you should either pay the author right and generate money right out of that right, or basically copy it in from now on, he would be the owner of that. + +[44:00] Now on, he would be the owner of that. piece, piece, piece. What do you mean by copy, though, I feel? like that's why it works for Network, Perspective, because now you have like five identical copies of the same ones, Yeah, the current design is exclusively- like encouraging sharing the code, because we don't want to store it, Tens or hundreds of duplicate version, drops, because it's the biggest lecture entries by far. So you don't want to encourage code to paste. And you know fragment system or anything and again, wait, I think I went around here, but in DT. for all like a lot of the things. Furthermore, I for all like a lot of the things I implemented by proxy patterns and Stuff Etc. again, a lot of instances are referring to, say, another single contract, instance and you know you do not deploy your own like. copy of unis love, and you do not maintain it. Instead of it, you can just refer to a proxied instance of unison. That is yes. + +[45:00] Proxied instance of unison, that is yes. Getting updated by someone, probably. But I don't think it's fair to say that a Do. Your Own Thing, Furthermore, I know I would be happy to see some. incentive model Incorporated here in. is part of that. But you know, maybe that's something that we need to. You know, like think separately of the countries. Yeah, I think we, we should allow the. We should allow contracts to be detached, from a concept of an owner, You know, for a lot of legal reasons. You know for, for a lot of legal reasons, this is better for some contracts. You, know, like the uni swap contracts are not. upgradable, they don't have an owner. + +[46:00] Upgradable they don't have an owner, and that's the way they want it like It exists like that for a reason right. So I don't think we should, kind of like own over, emphasize this concept of an owner. Aaron, do you think it would be? reasonable to constrain Auto bump, too reasonable to constrain Auto bump to these entries, which are the ones which are going to benefit from the most, so not all ledger entry is not contract. Data, data, data, but just the contract code and the. contract instances. contract instances-yeah, I think contract code contract. Yeah, I think contract code, contract instances for sure, But I think there's also the reason. why I wanted to Define autobump. Is I think there's also, areas where contract data should be Auto, bumped, bumped, bumped, so like, for instance, like if you have a tax, tax, tax, and then there are like entries on, the Decks that many users of the DEX use, the Audubon feature is kind of just. so that these entries are kind of like, paid for by all users instead of you. + +[47:00] Paid for by all users instead of you know one unlucky user. So I think there's still there are. definitely use cases where you'd want, something like an autobump primitive for contract data, However, I could definitely see a use. case where this is not the default, as a flag where contract instances and contract Wasm receive bumps. by requirement and then data defaults to false. But you can turn it on if you want, want, want, because I still think there are, definitely use cases where you'd want something like this for contract data, I don't know if it's a common case, but there definitely are significant cases. I think it's a very common case, like effectively everything in a, contract that is like a global variable or Global state that doesn't, specifically adhere to an account, like you know, like in a liquidity pool. the actual like pool values are global. and, and, and need to be bumped, We have been discussing this for some. + +[48:00] We have been discussing this for some. time. What about explicitly change the data to the contract instance? Like do we anticipate? some short case where you know the data, is kind of global, but it's not once? Per contract or something. If not, then maybe we just for the small Global state of the contract in the instant century and then you know it's subject to the same. Ultra bomb. Obviously, because if you think about it, it's kind of a part of the contract. That's another consideration here. If that's another consideration. Here I mean, I guess the question is how monolithic do we want to be right? Because the issue with that is that, the so say, the advantage is that, like if you have like a a contract instance, that's like has a very large amount, Global state, every call must bump all that state, whereas like I said, for instance, where the liquid equals, so there's like a poor implementation that has like 10. + +[49:00] Implementation that has like 10 different, like asset swaps or something And so, even if you only access one, you have to bump the other nine implicitly, whereas, whereas, whereas, if you don't title, and you keep, everything individual, then you only have to bump the entries you actually touch, Now I don't know like in practice, contracts might be small enough, that this isn't really a big issue. It definitely does simplify some things and decreases our right amplification, as well, because you'd only need to write at most one bump per contract instance. So this effectively goes. sorry, go ahead, I think it's difficult to distinguish, between Global and individual, because like, they're definitely going to be contracts that have like nexuses, like where it's not Global, but maybe it involves like multiple participants, so, so I don't know like, where do we draw the? line between Something should be a shared cost versus. + +[50:00] Something should be a shared cost versus being an individual cost. seems too difficult to do. That Yeah, other than like the really General, cases of contract code should definitely case of contract code should definitely be a shared cost, like in for contract data. I don't think we can make that call like. Furthermore, I think the country developers do foreign, foreign, foreign LY think the safest option is just to default this to true, and then maybe expose a flag just because I feel like the benefit or the drawback to Auto bumping, something where when it shouldn't be Auto bumped is very minimal. Because the whole idea is that for the bump to be a small bump, such that if lots of users are accessing it, over time it grows. But, for instance, right, if you have a balance, it. You know and you say view, balance, and you have to pay 10 extra ledgers of, rent on the one entry. I mean that will rent on the one entry. Furthermore, I mean that will literally. Like that might be less than like an Excel. I'd be measured in strips. And so I feel like the, the drawback to, not having autobombs can be high if you. + +[51:00] Not having autobombs can be high if you have a shared. you know Global Entry that you forget to bump and then for the entire lifetime of that, contract you have to manually bump it. It's expensive. I feel like that's a much it's expensive. Furthermore, I feel like that's a much worse failure case than the other case. where you sometimes have to pay a small, additional fee to access some entries. Leaning into that is there is a really a. leaning into that is there is a really big downside to not requiring Auto bump. like what's not having the option, not making configurable? Oh yeah, just like requiring it like universally, Yeah, I personally think there's. I think, yeah, I personally think there's. Furthermore, I think there's not a. Furthermore, I think there maybe is one, negative use case. We have Oracle data that should really only live for like five minutes or ten minutes, It would be really annoying if you. know, especially if this data is accessed. often in like a DEX sort of environment. I think that would be really environment. Furthermore, I think that would be really annoying if you access this thing. So much in a five-minute lifetime, such that it always lives for like six hours or + +[52:00] it always lives for like six hours, or like two weeks after, you know, even though you should, only do five minutes. So I think for like very, short-lived entries and like Oracle data, there's a strong use case outside of that. I don't see a super outside of that. Furthermore, I don't see a super strong use case, for not just repairing oil pumps, everywhere, see, see. Are you suggesting setting into true? for every type of Entry or just for restorable entries? Because, like for the Oracle use case that you're describing, Grand- wouldn't they use temporary entries for that? Yeah, yes, I think so, I think right now. We are talking about a unified interface. where both restorable and temporary have, the same expiration Ledger interface and both have the same Auto buttons. Yeah, I feel like contracts, put in contracts. Avoid the issue that you're describing. You know where you accidentally, continue to rebump a temporary entry. by by by, I guess, like moving on to using, like a. + +[53:00] I guess, like moving on to using like a, contract that uses a temporary for five minutes, I guess how would people be referencing that like an hour from now, well, no. so that's the issue, right. So it's that no one is referencing it now. and for now, but in that five minutes Enough people referenced it, such that it was aren't bumped so much that it will live significantly longer than five, minutes, minutes, minutes. So the issue is: no one will access it, within an hour, but it will live. You know, an hour, two hours or however long. So this I think what you're just I, guess. So, what if Auto bumping wasn't? didn't bump it a fixed amount? What if it bumped up to some CAP, I mean it does bump up to some CAP. Furthermore, I mean it does bump up to some CAP because we have, a, a maximum rent balance, for a maximum expiration Ledger due. + +[54:00] For a maximum expiration Ledger. Due to. You know issues we discussed earlier And so there is a CAP. It's just that the issue is that you know that CAP will make sense for. some entries, but doesn't make sense for others. So, for instance, like if the CAP is six months and in the extreme case you have an oracle entry That was. supposed to last five minutes. That lasts, up to the CAP of six months. That's a lot of wasted fees, okay. So it sort of feels okays. So it sort of feels: go ahead, Sorry, it's sort of so. This is the last thing I'm going to say. I think on this it sort of feels like. Auto bump. Okay, we could argue that what Obama should be configurable per entry? But it also sounds like the amount of, This thing should be Auto bumped before. If we're making a configurable, it's not really a binary, yes or no, it's. more like an amount. + +[55:00] More like an amount, like as a contract developer, The contract developers really probably like the person who's gonna, probably like the person who's going to know best like how long if this is going, to get bumped it needs to be bumped for Or I guess it's some combination of the, contract developer and the user, but yeah, just I'm a little skeptical of, allowing contracts to Define. You know like access fee amounts. Right, because if a contract was like: oh, you know like even if it's not malicious, it's just. like a stupid design. It's like, hey, this should have six months. And it's like a you know a thousand. Or it's like a you, know 10 kilobyte entry. That you know. requires a six-month bomb just to access, That's a pretty poor UX. yeah, I'm like this is just the slippery. slope of configurability. Like the more I yeah, in some ways maybe we need You know, to Paul's Point around, like the not making things configurable. + +[56:00] Maybe this is the trade-off we make. it okay like temporary entries. might get bumped more than what. They need to. They might live around a little bit longer, but that's a trade-off for a bit longer, but that's a trade-off for all these other reasons. Okay, so we're at time. It definitely sounds like. everyone's on board with Ledger. Exploration: moving on from rent balance, balance, balance. There are the questions of which. flags we expose, And there is the question that Lima raised about revisit, revisiting the. The idea of contract attached state which we decided against in the context of metadata, but that wasn't, wasn't, wasn't. Yeah, it wasn't a landslide of + +[57:00] yeah, it wasn't a landslide of- opinions there. So maybe it's worth rethinking about that. So, again it's rethinking about that. So, again, it does sound like you have a lot to work, with right now in terms of Ledger, exploration, exploration, exploration. I would try to summarize this. Furthermore, I would try to summarize this: question about which kind of Flags, We've, we've just debated and and We've, we've just debated and starting a discussion around that, to give people some asynchronous time, to think about that, And Lima, if you could kind of like, resurface this idea of kind of like contract attached state, and the benefits of that both in this context and in others. I, both in this context and in others. Furthermore, I think that could be beneficial. Garden, is there anything else? You know that sounds good. Okay, Lima, I saw you unmuted for a sec. there. Yeah, I just want to just say that. + +[58:00] There, yeah, I just want to just say that yeah, when I have some time, oh right, I guess. Is that my nature with the contract data? You said through the time yeah, so I. I don't know if you have, enough time to get it shipped into the one. Even everyone is on board, but yeah, I need to think a bit more about this. Awesome, awesome, okay. Thank you everyone, it's been a great session. See you all next week. + +
diff --git a/meetings/README.md b/meetings/README.md new file mode 100644 index 0000000000..71eda6c4c0 --- /dev/null +++ b/meetings/README.md @@ -0,0 +1,75 @@ +# Developer Meetings + +This folder contains the meeting posts and a helper script to generate new +meeting pages from a YouTube video ID (11 characters). + +## Script + +`meetings/new-meeting.py` + +What it does: + +- Downloads YouTube captions (VTT) via `yt_dlp`. +- Builds a transcript with 1-minute blocks. +- Adds punctuation and spellcheck. +- Creates a new `meetings/YYYY-MM-DD.mdx` page with front-matter and YouTube + embed. + +What it doesn't do: + +- Draft a perfect description. +- Add a helpful resources section. +- Pull the actual meeting date when different from video upload date. +- Handle days when there are multiple meetings to put together. + +### Requirements + +Python 3.9+ and a virtual environment. + +### Setup + +```bash +python -m venv .venv +.\.venv\Scripts\activate +python -m pip install yt_dlp webvtt-py deepmultilingualpunctuation language_tool_python +``` + +Spellcheck requires Java (used by `language_tool_python`). If Java isn’t +installed, spellcheck will be skipped with a warning. + +Java install (any platform): + +- Install a [recent JDK](https://www.oracle.com/java/technologies/downloads) + (Java 17+ recommended). +- Make sure `java` is on your PATH (`java -version` should work). + +### Basic usage + +Run and answer the prompts: + +```bash +python meetings/new-meeting.py +``` + + + +Or pass values directly: + +```bash +python meetings/new-meeting.py \ + --video VIDEO_ID \ + --authors name-slug \ + --tags developer +``` + +This will: + +- Download captions into memory (deleted without `--keep-vtt`) +- Create `meetings/YYYY-MM-DD.mdx` using the YouTube upload date + +## Notes + +- `--authors` must match IDs in `meetings/authors.yml`. +- Use `--no-create-page` to skip MDX generation and only export captions. +- Use `--save-txt` to also write `transcripts_out/VIDEO_ID.en.txt` or use + `--keep-vtt` to keep raw closed-caption files. diff --git a/meetings/new-meeting.py b/meetings/new-meeting.py new file mode 100644 index 0000000000..d7dc735bd7 --- /dev/null +++ b/meetings/new-meeting.py @@ -0,0 +1,577 @@ +#!/usr/bin/env python3 +import argparse +import datetime as dt +import json +import pathlib +import re +import subprocess +import sys +import webvtt +from typing import Dict, Iterator, List, Optional, Tuple + +PUNCTUATION_MODEL = None +PUNCTUATION_FAILED = False +LANGUAGE_TOOL = None +STOPWORDS = { + "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "from", "has", "have", "he", "her", + "his", "how", "i", "if", "in", "is", "it", "its", "me", "my", "not", "of", "on", "or", "our", + "she", "so", "that", "the", "their", "them", "they", "this", "to", "up", "us", "we", "were", + "what", "when", "where", "which", "who", "will", "with", "you", "your", +} +SUMMARY_KEYWORDS = { + "cap", "caps", "protocol", "upgrade", "validator", "ledger", "freeze", "ttl", "extension", + "host", "function", "functions", "address", "strkey", "soroban", "muxedaddress", + "network", "config", "vote", "voting", "contract", "contracts", "sdk", +} + +youtubeIdRE = re.compile(r"(?:v=|\/)([0-9A-Za-z_-]{11})(?:\?|&|\/|$)") +vttTimeRE = re.compile(r"^(\d{2}):(\d{2}):(\d{2})\.(\d{3})$") + +def extractVideoId(urlOrId: str) -> Optional[str]: + s = urlOrId.strip() + if len(s) == 11 and re.fullmatch(r"[0-9A-Za-z_-]{11}", s): + return s + m = youtubeIdRE.search(s) + return m.group(1) if m else None + +def readList(path: pathlib.Path) -> Iterator[str]: + with path.open(encoding="utf-8") as f: + for line in f: + clean = line.strip() + if clean and not clean.startswith("#"): + yield clean + +def vttToText(vttPath: pathlib.Path) -> str: + parts = [] + for cue in webvtt.read(str(vttPath)): + parts.append(cue.text.strip()) + return "\n".join(filter(None, parts)) + "\n" + +def parseVttTime(ts: str) -> float: + m = vttTimeRE.match(ts) + if not m: + return 0.0 + hours, minutes, seconds, millis = (int(x) for x in m.groups()) + return hours * 3600 + minutes * 60 + seconds + (millis / 1000.0) + +def cleanCaptionText(text: str) -> str: + text = re.sub(r"<[^>]+>", "", text) + text = re.sub(r"AGT;+", "", text) + text = re.sub(r"\b(?:um+|uh+|erm+|hmm+|mm+)\b[,.]?", "", text, flags=re.IGNORECASE) + text = normalizeProtocolAcronyms(text) + text = re.sub(r"\bprotocol\s+(\d+)\b", lambda m: f"Protocol {m.group(1)}", text, flags=re.IGNORECASE) + text = re.sub(r"\bstella?r\b", "Stellar", text, flags=re.IGNORECASE) + text = re.sub(r"\bopen\s*(?:zeppelin|zepplin|zepelin|rubin)\b", "OpenZeppelin", text, flags=re.IGNORECASE) + text = re.sub(r"\b(sorond|soron|soran|soroban|orb[áa]n|soro?b[oa]n)\b", "Soroban", text, flags=re.IGNORECASE) + text = text.replace("\n", " ") + text = re.sub(r"\s+", " ", text) + return text.strip() + +def normalizeProtocolAcronyms(text: str) -> str: + text = re.sub( + r"\b(cap|sep|slp)s\b", + lambda m: f"{m.group(1).upper()}s", + text, + flags=re.IGNORECASE, + ) + + text = re.sub( + r"\b(cap|sep|slp)['’]s\b(?=\s+(?:aka|and|what|these|those|themselves|you|we|they|are|were|have|include|includes|suggest|suggests|allow|allows|make|makes|work|works|change|changes|can|could|should|would|may|might|must)\b)", + lambda m: f"{m.group(1).upper()}s", + text, + flags=re.IGNORECASE, + ) + + text = re.sub( + r"\b(cap|sep|slp)\b", + lambda m: m.group(1).upper(), + text, + flags=re.IGNORECASE, + ) + + text = re.sub( + r"\b(CAP|SEP|SLP)\s*-?\s*(\d{1,4})\b", + lambda m: f"{m.group(1).upper()}-{int(m.group(2))}", + text, + flags=re.IGNORECASE, + ) + + text = re.sub( + r"\bcat\s*-?\s*(\d{1,4})\b", + lambda m: f"CAP-{int(m.group(1))}", + text, + flags=re.IGNORECASE, + ) + + return text + +def formatTimestamp(seconds: int) -> str: + mins = seconds // 60 + secs = seconds % 60 + return f"{mins:02d}:{secs:02d}" + +def yamlEscape(value: str) -> str: + return value.replace("\\", "\\\\").replace("\"", "\\\"") + +def punctuateText(text: str, enabled: bool) -> str: + global PUNCTUATION_MODEL, PUNCTUATION_FAILED + + if not enabled or PUNCTUATION_FAILED: + return text + + try: + from deepmultilingualpunctuation import PunctuationModel # type: ignore + except Exception: + print("warning: deepmultilingualpunctuation not installed; skipping punctuation", file=sys.stderr) + PUNCTUATION_FAILED = True + return text + + if PUNCTUATION_MODEL is None: + try: + PUNCTUATION_MODEL = PunctuationModel() + except Exception as exc: + print(f"warning: punctuation model failed; skipping punctuation ({exc})", file=sys.stderr) + PUNCTUATION_FAILED = True + return text + + try: + return PUNCTUATION_MODEL.restore_punctuation(text) + except Exception as exc: + print(f"warning: punctuation model failed; skipping punctuation ({exc})", file=sys.stderr) + PUNCTUATION_FAILED = True + return text + +def spellcheckText(text: str, enabled: bool) -> str: + if not enabled: + return text + try: + import language_tool_python # type: ignore + except Exception: + print("warning: language_tool_python not installed; skipping spellcheck", file=sys.stderr) + return text + if len(text) > 12000: + print("warning: text too long for spellcheck; skipping", file=sys.stderr) + return text + global LANGUAGE_TOOL + if LANGUAGE_TOOL is None: + try: + LANGUAGE_TOOL = language_tool_python.LanguageTool("en-US") + except Exception as exc: + print(f"warning: spellcheck init failed; skipping spellcheck ({exc})", file=sys.stderr) + return text + return LANGUAGE_TOOL.correct(text) + +def dedupeRepeatedPhrases(text: str) -> str: + tokens = text.split() + if len(tokens) < 6: + return text + maxWindow = 12 + maxPasses = 3 + for _ in range(maxPasses): + i = 0 + changed = False + while i < len(tokens): + window = min(maxWindow, (len(tokens) - i) // 2) + matched = False + for n in range(window, 1, -1): + a = tokens[i:i + n] + b = tokens[i + n:i + 2 * n] + if len(b) < n: + continue + normA = [re.sub(r"^\W+|\W+$", "", t).lower() for t in a] + normB = [re.sub(r"^\W+|\W+$", "", t).lower() for t in b] + if normA == normB and any(normA): + del tokens[i + n:i + 2 * n] + changed = True + matched = True + break + if not matched: + i += 1 + if not changed: + break + return " ".join(tokens) + +def splitSentences(text: str) -> List[str]: + parts = re.split(r"(?<=[.!?])\s+", text.strip()) + return [p.strip() for p in parts if p.strip()] + +def tokenize(text: str) -> List[str]: + return [ + w.lower() + for w in re.findall(r"[A-Za-z0-9']+", text) + if len(w) > 2 and w.lower() not in STOPWORDS + ] + +def normalizeSentence(sentence: str) -> str: + sentence = re.sub(r"\s+", " ", sentence).strip() + if sentence and sentence[-1] not in ".!?": + sentence += "." + return sentence[0].upper() + sentence[1:] if sentence else sentence + +def extractSummaryItems(sentence: str) -> List[str]: + items: List[str] = [] + for token in re.findall(r"[A-Za-z0-9-]+", sentence): + lower = token.lower() + if re.fullmatch(r"CAP-?\d+", token, flags=re.IGNORECASE): + items.append(token.upper()) + continue + if token.isupper() and len(token) > 2: + items.append(token) + continue + if lower in SUMMARY_KEYWORDS: + items.append(token) + continue + if token[0].isupper() and lower not in STOPWORDS: + items.append(token) + seen = set() + deduped: List[str] = [] + for item in items: + key = item.lower() + if key in seen: + continue + seen.add(key) + deduped.append(item) + return deduped[:7] + +def synthesizePoint(sentence: str) -> str: + items = extractSummaryItems(sentence) + if items: + return f"Discussion focused on {', '.join(items)}." + return normalizeSentence(sentence) + +def extractResourceLinks(blocks: List[Tuple[int, str]]) -> List[Tuple[str, str]]: + matches: List[Tuple[str, str]] = [] + seen = set() + for _, block in blocks: + for match in re.finditer(r"\b(CAP|SEP)[- ]?(\d{1,4})\b", block, flags=re.IGNORECASE): + kind = match.group(1).upper() + number = int(match.group(2)) + key = (kind, number) + if key in seen: + continue + seen.add(key) + slug = f"{number:04d}" + if kind == "CAP": + url = f"https://github.com/stellar/stellar-protocol/blob/master/core/cap-{slug}.md" + else: + url = f"https://github.com/stellar/stellar-protocol/blob/master/ecosystem/sep-{slug}.md" + matches.append((f"{kind}-{number}", url)) + return matches + +def summarizeKeyPoints(blocks: List[Tuple[int, str]], maxPoints: int) -> List[str]: + if not blocks or maxPoints <= 0: + return [] + + text = " ".join(block for _, block in blocks) + sentences = splitSentences(text) + if not sentences: + return [] + + freqs: Dict[str, int] = {} + for sentence in sentences: + for token in tokenize(sentence): + freqs[token] = freqs.get(token, 0) + 1 + + scored: List[Tuple[int, float, str, set]] = [] + for idx, sentence in enumerate(sentences): + lowered = sentence.lower() + fillerHits = len(re.findall(r"\b(yeah|okay|ok|um|uh|like|you know|sort of|kind of)\b", lowered)) + tokens = tokenize(sentence) + if len(tokens) < 5 or len(tokens) > 30: + continue + if fillerHits >= 2: + continue + if len(sentence) < 50: + continue + score = sum(freqs.get(t, 0) for t in tokens) / max(len(tokens), 1) + keywordBonus = sum(1 for t in tokens if t in SUMMARY_KEYWORDS) * 0.6 + score += keywordBonus + scored.append((idx, score, sentence, set(tokens))) + + if not scored: + fallback = normalizeSentence(sentences[0]) + return [fallback] if fallback else [] + + scored.sort(key=lambda x: (-x[1], x[0])) + picked: List[str] = [] + pickedSets: List[set] = [] + pickedNorm: set = set() + for _, _, sentence, tokenSet in scored: + if len(picked) >= maxPoints: + break + tooSimilar = False + for prevSet in pickedSets: + if not prevSet: + continue + overlap = len(prevSet & tokenSet) / max(len(prevSet | tokenSet), 1) + if overlap > 0.6: + tooSimilar = True + break + if tooSimilar: + continue + synthesized = synthesizePoint(sentence) + synthNorm = re.sub(r"\s+", " ", synthesized).strip().lower() + if synthNorm in pickedNorm: + continue + picked.append(synthesized) + pickedNorm.add(synthNorm) + pickedSets.append(tokenSet) + return picked + +def vttToMinuteBlocks(vttPath: pathlib.Path, blockSeconds: int, punctuate: bool, spellcheck: bool) -> List[Tuple[int, str]]: + buckets: Dict[int, List[str]] = {} + for cue in webvtt.read(str(vttPath)): + startS = parseVttTime(cue.start) + minuteBucket = int(startS // blockSeconds) * blockSeconds + clean = cleanCaptionText(cue.text) + if not clean: + continue + items = buckets.setdefault(minuteBucket, []) + if not items or items[-1] != clean: + items.append(clean) + + blocks: List[Tuple[int, str]] = [] + lastNorm: Optional[str] = None + for bucket in sorted(buckets.keys()): + joined = " ".join(buckets[bucket]).strip() + if not joined: + continue + joined = punctuateText(joined, punctuate) + joined = spellcheckText(joined, spellcheck) + joined = normalizeProtocolAcronyms(joined) + joined = dedupeRepeatedPhrases(joined) + norm = re.sub(r"\s+", " ", joined).strip().lower() + if norm and norm == lastNorm: + continue + blocks.append((bucket, joined)) + lastNorm = norm + return blocks + +def fetchMetadata(videoId: str, args: argparse.Namespace) -> Dict[str, str]: + cmd = [ + sys.executable, + "-m", + "yt_dlp", + "--skip-download", + "--dump-single-json", + f"https://www.youtube.com/watch?v={videoId}", + ] + if args.cookies: + cmd.extend(["--cookies", args.cookies]) + if args.remoteComponents: + cmd.extend(["--remote-components", args.remoteComponents]) + if args.jsRuntime: + cmd.extend(["--js-runtimes", args.jsRuntime]) + if args.impersonate: + cmd.extend(["--impersonate", args.impersonate]) + proc = subprocess.run(cmd, capture_output=True, text=True) + if proc.returncode: + print(f"{videoId}: yt-dlp metadata failed ({proc.returncode})\n{proc.stderr}", file=sys.stderr) + return {} + try: + return json.loads(proc.stdout) + except Exception: + return {} + +def buildMdx( + videoId: str, + title: str, + description: str, + authors: List[str], + tags: List[str], + blocks: List[Tuple[int, str]], + summaryPoints: List[str], + resourceLinks: List[Tuple[str, str]], +) -> str: + safeTitle = yamlEscape(title) + safeDescription = yamlEscape(description) + frontMatterLines = [ + "---", + f"title: \"{safeTitle}\"", + f"description: \"{safeDescription}\"", + "authors:", + ] + for author in authors: + frontMatterLines.append(f" - {author}") + frontMatterLines.append(f"tags: [{', '.join(tags)}]") + frontMatterLines.append("---") + frontMatter = "\n".join(frontMatterLines) + + transcriptLines = [] + prevEndedSentence = True + for offset, text in blocks: + cleanedText = text + if cleanedText and not prevEndedSentence: + cleanedText = cleanedText[0].lower() + cleanedText[1:] + transcriptLines.append(f"[{formatTimestamp(offset)}] {cleanedText}") + transcriptLines.append("") + prevEndedSentence = bool(re.search(r"[.!?]$", text.strip())) + + transcriptBody = "\n".join(transcriptLines).rstrip() + summaryBlock = "" + if summaryPoints: + summaryLines = [f"- {point}" for point in summaryPoints] + [""] + summaryBlock = "\n".join(summaryLines) + resourcesBlock = "" + if resourceLinks: + resourceLines = [f"- [{label}]({url})" for label, url in resourceLinks] + resourcesBlock = "\n".join(resourceLines) + "\n" + return ( + f"{frontMatter}\n\n" + "import YouTube from \"@site/src/components/YouTube\";\n\n" + f"\n\n" + f"### Key Points\n\n" + f"{summaryBlock}\n\n" + f"### Resources\n\n" + f"{resourcesBlock}\n\n" + "
\n" + " Video Transcript\n\n" + f"{transcriptBody}\n\n" + "
\n" + ) + +def fetchCaptions(videoId: str, outDir: pathlib.Path, args: argparse.Namespace) -> None: + outDir.mkdir(parents=True, exist_ok=True) + cmd = [ + sys.executable, + "-m", + "yt_dlp", + "--skip-download", + "--write-subs", + "--write-auto-subs", + "--sub-langs", + args.lang, + "--sub-format", + "vtt", + "-o", + str(outDir / f"{videoId}.%(ext)s"), + f"https://www.youtube.com/watch?v={videoId}", + ] + if args.cookies: + cmd.extend(["--cookies", args.cookies]) + if args.remoteComponents: + cmd.extend(["--remote-components", args.remoteComponents]) + if args.jsRuntime: + cmd.extend(["--js-runtimes", args.jsRuntime]) + if args.impersonate: + cmd.extend(["--impersonate", args.impersonate]) + + proc = subprocess.run(cmd, capture_output=True, text=True) + if proc.returncode: + print(f"{videoId}: yt-dlp failed ({proc.returncode})\n{proc.stderr}", file=sys.stderr) + return + + vttFiles = list(outDir.glob(f"{videoId}*.vtt")) + if not vttFiles: + print(f"{videoId}: no captions", file=sys.stderr) + return + + vttPath = max(vttFiles, key=lambda p: p.stat().st_mtime) + if not args.createPage or args.saveTxt: + txtPath = outDir / f"{videoId}.{args.lang}.txt" + txtPath.write_text(vttToText(vttPath), encoding="utf-8") + print(f"{videoId}: saved {txtPath}") + + if args.createPage: + if not args.authors: + args.authors = "placeholder" + metadata = fetchMetadata(videoId, args) + title = args.title or metadata.get("title") or f"Meeting {videoId}" + description = args.description or metadata.get("description", "") + if description: + description = description.strip().replace("\n", " ") + description = re.sub(r"\s+", " ", description)[:180] + tags = [t.strip() for t in (args.tags or "developer").split(",") if t.strip()] + authors = [a.strip() for a in args.authors.split(",") if a.strip()] + + uploadDate = metadata.get("upload_date") + dateStr = args.date or (f"{uploadDate[:4]}-{uploadDate[4:6]}-{uploadDate[6:]}" if uploadDate else None) + if not dateStr: + dateStr = dt.date.today().isoformat() + + blocks = vttToMinuteBlocks( + vttPath, + blockSeconds=args.blockSeconds, + punctuate=args.punctuate, + spellcheck=args.spellcheck, + ) + summaryPoints = [] if args.noSummary else summarizeKeyPoints(blocks, args.summaryPoints) + resourceLinks = extractResourceLinks(blocks) + if (not args.description) and (not description) and summaryPoints: + description = summaryPoints[0] + description = description.strip().replace("\n", " ") + description = re.sub(r"\s+", " ", description)[:180] + if not description: + description = "Stellar developer meeting transcript." + + meetingsDir = pathlib.Path(args.meetingsDir) + meetingsDir.mkdir(parents=True, exist_ok=True) + outPath = meetingsDir / f"{dateStr}.mdx" + if outPath.exists() and not args.overwrite: + print(f"{videoId}: {outPath} exists (use --overwrite to replace)", file=sys.stderr) + return + outPath.write_text( + buildMdx(videoId, title, description, authors, tags, blocks, summaryPoints, resourceLinks), + encoding="utf-8", + ) + print(f"{videoId}: wrote {outPath}") + + if not args.keepVtt: + for vttFile in vttFiles: + try: + vttFile.unlink() + except OSError: + pass + try: + if outDir.exists() and not any(outDir.iterdir()): + outDir.rmdir() + except OSError: + pass + +def main() -> None: + parser = argparse.ArgumentParser(description="Export YouTube captions to text") + parser.set_defaults(createPage=True, punctuate=True, spellcheck=True) + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument("--video", help="YouTube URL or ID") + group.add_argument("--list", help="File with one URL/ID per line") + parser.add_argument("--lang", default="en") + parser.add_argument("--out", default="transcripts_out") + parser.add_argument("--cookies", help="Path to cookies.txt for authenticated captions") + parser.add_argument("--remote-components", dest="remoteComponents") + parser.add_argument("--js-runtime", dest="jsRuntime") + parser.add_argument("--impersonate") + parser.add_argument("--no-create-page", action="store_false", dest="createPage", help="Only export captions to text") + parser.add_argument("--save-txt", action="store_true", dest="saveTxt", help="Also save a plain text transcript") + parser.add_argument("--keep-vtt", action="store_true", dest="keepVtt", help="Keep downloaded VTT files") + parser.add_argument("--meetings-dir", default="meetings", dest="meetingsDir") + parser.add_argument("--title") + parser.add_argument("--description") + parser.add_argument("--authors", help="Comma-separated speaker slugs from meetings/authors.yml") + parser.add_argument("--tags", help="Comma-separated tags (default: developer)") + parser.add_argument("--date", help="YYYY-MM-DD; defaults to upload date") + parser.add_argument("--overwrite", action="store_true") + parser.add_argument("--block-seconds", type=int, default=60, dest="blockSeconds") + parser.add_argument("--no-punctuate", action="store_false", dest="punctuate") + parser.add_argument("--no-spellcheck", action="store_false", dest="spellcheck") + parser.add_argument("--summary-points", type=int, default=4, dest="summaryPoints", help="Number of summary bullets to include") + parser.add_argument("--no-summary", action="store_true", dest="noSummary", help="Skip summary generation") + args = parser.parse_args() + + if not args.video and not args.list: + args.video = input("YouTube video ID or URL: ").strip() or None + if args.createPage and not args.authors: + args.authors = input("Speakers (comma-separated, from authors.yml): ").strip() or None + if args.createPage and not args.tags: + args.tags = input("Tags (comma-separated, default: developer): ").strip() or "developer" + + inputs = [args.video] if args.video else list(readList(pathlib.Path(args.list))) + videoIds = [vid for vid in (extractVideoId(item) for item in inputs) if vid] + if not videoIds: + parser.error("no valid video IDs found") + + outDir = pathlib.Path(args.out) + for vid in videoIds: + fetchCaptions(vid, outDir, args) + +if __name__ == "__main__": + main() diff --git a/static/img/docusaurus/authors/nobody.svg b/static/img/docusaurus/authors/nobody.svg new file mode 100644 index 0000000000..7ff3975fb5 --- /dev/null +++ b/static/img/docusaurus/authors/nobody.svg @@ -0,0 +1,7 @@ + + + + + + +