diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..3f6d19b
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,48 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+  workflow_dispatch:
+
+# A push that obsoletes a previous run cancels it.
+concurrency:
+  group: ci-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    name: ruff
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+          cache-dependency-path: pyproject.toml
+      - run: pip install --upgrade pip
+      - run: pip install ruff
+      - run: ruff check .
+
+  test:
+    name: pytest (py${{ matrix.python }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python: ["3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          cache: pip
+          cache-dependency-path: pyproject.toml
+      - run: pip install --upgrade pip
+      # ``-e .[dev,plotting,excel]`` so every optional extra is exercised.
+      # Gurobi is not installable on free runners; the relevant tests
+      # skip themselves when ``optlang.gurobi_interface`` cannot import.
+      - run: pip install -e ".[dev,plotting,excel]"
+      - run: pytest -q --maxfail=5 --durations=20
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1f44cf3
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,541 @@
+GNU GENERAL PUBLIC LICENSE
+==========================
+
+Version 3, 29 June 2007
+
+Copyright &copy; 2007 Free Software Foundation, Inc. &lt;<http://fsf.org/>&gt;
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+## Preamble
+
+The GNU General Public License is a free, copyleft license for software and other
+kinds of works.
+
+The licenses for most software and other practical works are designed to take away
+your freedom to share and change the works. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change all versions of a
+program--to make sure it remains free software for all its users. We, the Free
+Software Foundation, use the GNU General Public License for most of our software; it
+applies also to any other work released this way by its authors. You can apply it to
+your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our General
+Public Licenses are designed to make sure that you have the freedom to distribute
+copies of free software (and charge for them if you wish), that you receive source
+code or can get it if you want it, that you can change the software or use pieces of
+it in new free programs, and that you know you can do these things.
+
+To protect your rights, we need to prevent others from denying you these rights or
+asking you to surrender the rights. Therefore, you have certain responsibilities if
+you distribute copies of the software, or if you modify it: responsibilities to
+respect the freedom of others.
+
+For example, if you distribute copies of such a program, whether gratis or for a fee,
+you must pass on to the recipients the same freedoms that you received. You must make
+sure that they, too, receive or can get the source code. And you must show them these
+terms so they know their rights.
+
+Developers that use the GNU GPL protect your rights with two steps: (1) assert
+copyright on the software, and (2) offer you this License giving you legal permission
+to copy, distribute and/or modify it.
+
+For the developers' and authors' protection, the GPL clearly explains that there is
+no warranty for this free software. For both users' and authors' sake, the GPL
+requires that modified versions be marked as changed, so that their problems will not
+be attributed erroneously to authors of previous versions.
+
+Some devices are designed to deny users access to install or run modified versions of
+the software inside them, although the manufacturer can do so. This is fundamentally
+incompatible with the aim of protecting users' freedom to change the software. The
+systematic pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we have designed
+this version of the GPL to prohibit the practice for those products. If such problems
+arise substantially in other domains, we stand ready to extend this provision to
+those domains in future versions of the GPL, as needed to protect the freedom of
+users.
+
+Finally, every program is threatened constantly by software patents. States should
+not allow patents to restrict development and use of software on general-purpose
+computers, but in those that do, we wish to avoid the special danger that patents
+applied to a free program could make it effectively proprietary. To prevent this, the
+GPL assures that patents cannot be used to render the program non-free.
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+## TERMS AND CONDITIONS
+
+### 0. Definitions.
+
+&ldquo;This License&rdquo; refers to version 3 of the GNU General Public License.
+
+&ldquo;Copyright&rdquo; also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+&ldquo;The Program&rdquo; refers to any copyrightable work licensed under this
+License. Each licensee is addressed as &ldquo;you&rdquo;. &ldquo;Licensees&rdquo; and
+&ldquo;recipients&rdquo; may be individuals or organizations.
+
+To &ldquo;modify&rdquo; a work means to copy from or adapt all or part of the work in
+a fashion requiring copyright permission, other than the making of an exact copy. The
+resulting work is called a &ldquo;modified version&rdquo; of the earlier work or a
+work &ldquo;based on&rdquo; the earlier work.
+
+A &ldquo;covered work&rdquo; means either the unmodified Program or a work based on
+the Program.
+
+To &ldquo;propagate&rdquo; a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for infringement under
+applicable copyright law, except executing it on a computer or modifying a private
+copy. Propagation includes copying, distribution (with or without modification),
+making available to the public, and in some countries other activities as well.
+
+To &ldquo;convey&rdquo; a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through a computer
+network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays &ldquo;Appropriate Legal Notices&rdquo; to the
+extent that it includes a convenient and prominently visible feature that (1)
+displays an appropriate copyright notice, and (2) tells the user that there is no
+warranty for the work (except to the extent that warranties are provided), that
+licensees may convey the work under this License, and how to view a copy of this
+License. If the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+### 1. Source Code.
+
+The &ldquo;source code&rdquo; for a work means the preferred form of the work for
+making modifications to it. &ldquo;Object code&rdquo; means any non-source form of a
+work.
+
+A &ldquo;Standard Interface&rdquo; means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of interfaces
+specified for a particular programming language, one that is widely used among
+developers working in that language.
+
+The &ldquo;System Libraries&rdquo; of an executable work include anything, other than
+the work as a whole, that (a) is included in the normal form of packaging a Major
+Component, but which is not part of that Major Component, and (b) serves only to
+enable use of the work with that Major Component, or to implement a Standard
+Interface for which an implementation is available to the public in source code form.
+A &ldquo;Major Component&rdquo;, in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system (if any) on which
+the executable work runs, or a compiler used to produce the work, or an object code
+interpreter used to run it.
+
+The &ldquo;Corresponding Source&rdquo; for a work in object code form means all the
+source code needed to generate, install, and (for an executable work) run the object
+code and to modify the work, including scripts to control those activities. However,
+it does not include the work's System Libraries, or general-purpose tools or
+generally available free programs which are used unmodified in performing those
+activities but which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for the work, and
+the source code for shared libraries and dynamically linked subprograms that the work
+is specifically designed to require, such as by intimate data communication or
+control flow between those subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users can regenerate
+automatically from other parts of the Corresponding Source.
+
+The Corresponding Source for a work in source code form is that same work.
+
+### 2. Basic Permissions.
+
+All rights granted under this License are granted for the term of copyright on the
+Program, and are irrevocable provided the stated conditions are met. This License
+explicitly affirms your unlimited permission to run the unmodified Program. The
+output from running a covered work is covered by this License only if the output,
+given its content, constitutes a covered work. This License acknowledges your rights
+of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not convey, without
+conditions so long as your license otherwise remains in force. You may convey covered
+works to others for the sole purpose of having them make modifications exclusively
+for you, or provide you with facilities for running those works, provided that you
+comply with the terms of this License in conveying all material for which you do not
+control copyright. Those thus making or running the covered works for you must do so
+exclusively on your behalf, under your direction and control, on terms that prohibit
+them from making any copies of your copyrighted material outside their relationship
+with you.
+
+Conveying under any other circumstances is permitted solely under the conditions
+stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
+
+### 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+No covered work shall be deemed part of an effective technological measure under any
+applicable law fulfilling obligations under article 11 of the WIPO copyright treaty
+adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention
+of such measures.
+
+When you convey a covered work, you waive any legal power to forbid circumvention of
+technological measures to the extent such circumvention is effected by exercising
+rights under this License with respect to the covered work, and you disclaim any
+intention to limit operation or modification of the work as a means of enforcing,
+against the work's users, your or third parties' legal rights to forbid circumvention
+of technological measures.
+
+### 4. Conveying Verbatim Copies.
+
+You may convey verbatim copies of the Program's source code as you receive it, in any
+medium, provided that you conspicuously and appropriately publish on each copy an
+appropriate copyright notice; keep intact all notices stating that this License and
+any non-permissive terms added in accord with section 7 apply to the code; keep
+intact all notices of the absence of any warranty; and give all recipients a copy of
+this License along with the Program.
+
+You may charge any price or no price for each copy that you convey, and you may offer
+support or warranty protection for a fee.
+
+### 5. Conveying Modified Source Versions.
+
+You may convey a work based on the Program, or the modifications to produce it from
+the Program, in the form of source code under the terms of section 4, provided that
+you also meet all of these conditions:
+
+* **a)** The work must carry prominent notices stating that you modified it, and giving a
+relevant date.
+* **b)** The work must carry prominent notices stating that it is released under this
+License and any conditions added under section 7. This requirement modifies the
+requirement in section 4 to &ldquo;keep intact all notices&rdquo;.
+* **c)** You must license the entire work, as a whole, under this License to anyone who
+comes into possession of a copy. This License will therefore apply, along with any
+applicable section 7 additional terms, to the whole of the work, and all its parts,
+regardless of how they are packaged. This License gives no permission to license the
+work in any other way, but it does not invalidate such permission if you have
+separately received it.
+* **d)** If the work has interactive user interfaces, each must display Appropriate Legal
+Notices; however, if the Program has interactive interfaces that do not display
+Appropriate Legal Notices, your work need not make them do so.
+
+A compilation of a covered work with other separate and independent works, which are
+not by their nature extensions of the covered work, and which are not combined with
+it such as to form a larger program, in or on a volume of a storage or distribution
+medium, is called an &ldquo;aggregate&rdquo; if the compilation and its resulting
+copyright are not used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work in an aggregate
+does not cause this License to apply to the other parts of the aggregate.
+
+### 6. Conveying Non-Source Forms.
+
+You may convey a covered work in object code form under the terms of sections 4 and
+5, provided that you also convey the machine-readable Corresponding Source under the
+terms of this License, in one of these ways:
+
+* **a)** Convey the object code in, or embodied in, a physical product (including a
+physical distribution medium), accompanied by the Corresponding Source fixed on a
+durable physical medium customarily used for software interchange.
+* **b)** Convey the object code in, or embodied in, a physical product (including a
+physical distribution medium), accompanied by a written offer, valid for at least
+three years and valid for as long as you offer spare parts or customer support for
+that product model, to give anyone who possesses the object code either (1) a copy of
+the Corresponding Source for all the software in the product that is covered by this
+License, on a durable physical medium customarily used for software interchange, for
+a price no more than your reasonable cost of physically performing this conveying of
+source, or (2) access to copy the Corresponding Source from a network server at no
+charge.
+* **c)** Convey individual copies of the object code with a copy of the written offer to
+provide the Corresponding Source. This alternative is allowed only occasionally and
+noncommercially, and only if you received the object code with such an offer, in
+accord with subsection 6b.
+* **d)** Convey the object code by offering access from a designated place (gratis or for
+a charge), and offer equivalent access to the Corresponding Source in the same way
+through the same place at no further charge. You need not require recipients to copy
+the Corresponding Source along with the object code. If the place to copy the object
+code is a network server, the Corresponding Source may be on a different server
+(operated by you or a third party) that supports equivalent copying facilities,
+provided you maintain clear directions next to the object code saying where to find
+the Corresponding Source. Regardless of what server hosts the Corresponding Source,
+you remain obligated to ensure that it is available for as long as needed to satisfy
+these requirements.
+* **e)** Convey the object code using peer-to-peer transmission, provided you inform
+other peers where the object code and Corresponding Source of the work are being
+offered to the general public at no charge under subsection 6d.
+
+A separable portion of the object code, whose source code is excluded from the
+Corresponding Source as a System Library, need not be included in conveying the
+object code work.
+
+A &ldquo;User Product&rdquo; is either (1) a &ldquo;consumer product&rdquo;, which
+means any tangible personal property which is normally used for personal, family, or
+household purposes, or (2) anything designed or sold for incorporation into a
+dwelling. In determining whether a product is a consumer product, doubtful cases
+shall be resolved in favor of coverage. For a particular product received by a
+particular user, &ldquo;normally used&rdquo; refers to a typical or common use of
+that class of product, regardless of the status of the particular user or of the way
+in which the particular user actually uses, or expects or is expected to use, the
+product. A product is a consumer product regardless of whether the product has
+substantial commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+&ldquo;Installation Information&rdquo; for a User Product means any methods,
+procedures, authorization keys, or other information required to install and execute
+modified versions of a covered work in that User Product from a modified version of
+its Corresponding Source. The information must suffice to ensure that the continued
+functioning of the modified object code is in no case prevented or interfered with
+solely because modification has been made.
+
+If you convey an object code work under this section in, or with, or specifically for
+use in, a User Product, and the conveying occurs as part of a transaction in which
+the right of possession and use of the User Product is transferred to the recipient
+in perpetuity or for a fixed term (regardless of how the transaction is
+characterized), the Corresponding Source conveyed under this section must be
+accompanied by the Installation Information. But this requirement does not apply if
+neither you nor any third party retains the ability to install modified object code
+on the User Product (for example, the work has been installed in ROM).
+
+The requirement to provide Installation Information does not include a requirement to
+continue to provide support service, warranty, or updates for a work that has been
+modified or installed by the recipient, or for the User Product in which it has been
+modified or installed. Access to a network may be denied when the modification itself
+materially and adversely affects the operation of the network or violates the rules
+and protocols for communication across the network.
+
+Corresponding Source conveyed, and Installation Information provided, in accord with
+this section must be in a format that is publicly documented (and with an
+implementation available to the public in source code form), and must require no
+special password or key for unpacking, reading or copying.
+
+### 7. Additional Terms.
+
+&ldquo;Additional permissions&rdquo; are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions. Additional
+permissions that are applicable to the entire Program shall be treated as though they
+were included in this License, to the extent that they are valid under applicable
+law. If additional permissions apply only to part of the Program, that part may be
+used separately under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option remove any
+additional permissions from that copy, or from any part of it. (Additional
+permissions may be written to require their own removal in certain cases when you
+modify the work.) You may place additional permissions on material, added by you to a
+covered work, for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you add to a
+covered work, you may (if authorized by the copyright holders of that material)
+supplement the terms of this License with terms:
+
+* **a)** Disclaiming warranty or limiting liability differently from the terms of
+sections 15 and 16 of this License; or
+* **b)** Requiring preservation of specified reasonable legal notices or author
+attributions in that material or in the Appropriate Legal Notices displayed by works
+containing it; or
+* **c)** Prohibiting misrepresentation of the origin of that material, or requiring that
+modified versions of such material be marked in reasonable ways as different from the
+original version; or
+* **d)** Limiting the use for publicity purposes of names of licensors or authors of the
+material; or
+* **e)** Declining to grant rights under trademark law for use of some trade names,
+trademarks, or service marks; or
+* **f)** Requiring indemnification of licensors and authors of that material by anyone
+who conveys the material (or modified versions of it) with contractual assumptions of
+liability to the recipient, for any liability that these contractual assumptions
+directly impose on those licensors and authors.
+
+All other non-permissive additional terms are considered &ldquo;further
+restrictions&rdquo; within the meaning of section 10. If the Program as you received
+it, or any part of it, contains a notice stating that it is governed by this License
+along with a term that is a further restriction, you may remove that term. If a
+license document contains a further restriction but permits relicensing or conveying
+under this License, you may add to a covered work material governed by the terms of
+that license document, provided that the further restriction does not survive such
+relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you must place, in
+the relevant source files, a statement of the additional terms that apply to those
+files, or a notice indicating where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the form of a
+separately written license, or stated as exceptions; the above requirements apply
+either way.
+
+### 8. Termination.
+
+You may not propagate or modify a covered work except as expressly provided under
+this License. Any attempt otherwise to propagate or modify it is void, and will
+automatically terminate your rights under this License (including any patent licenses
+granted under the third paragraph of section 11).
+
+However, if you cease all violation of this License, then your license from a
+particular copyright holder is reinstated (a) provisionally, unless and until the
+copyright holder explicitly and finally terminates your license, and (b) permanently,
+if the copyright holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is reinstated permanently
+if the copyright holder notifies you of the violation by some reasonable means, this
+is the first time you have received notice of violation of this License (for any
+work) from that copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the licenses of
+parties who have received copies or rights from you under this License. If your
+rights have been terminated and not permanently reinstated, you do not qualify to
+receive new licenses for the same material under section 10.
+
+### 9. Acceptance Not Required for Having Copies.
+
+You are not required to accept this License in order to receive or run a copy of the
+Program. Ancillary propagation of a covered work occurring solely as a consequence of
+using peer-to-peer transmission to receive a copy likewise does not require
+acceptance. However, nothing other than this License grants you permission to
+propagate or modify any covered work. These actions infringe copyright if you do not
+accept this License. Therefore, by modifying or propagating a covered work, you
+indicate your acceptance of this License to do so.
+
+### 10. Automatic Licensing of Downstream Recipients.
+
+Each time you convey a covered work, the recipient automatically receives a license
+from the original licensors, to run, modify and propagate that work, subject to this
+License. You are not responsible for enforcing compliance by third parties with this
+License.
+
+An &ldquo;entity transaction&rdquo; is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an organization, or
+merging organizations. If propagation of a covered work results from an entity
+transaction, each party to that transaction who receives a copy of the work also
+receives whatever licenses to the work the party's predecessor in interest had or
+could give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if the predecessor
+has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the rights granted or
+affirmed under this License. For example, you may not impose a license fee, royalty,
+or other charge for exercise of rights granted under this License, and you may not
+initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging
+that any patent claim is infringed by making, using, selling, offering for sale, or
+importing the Program or any portion of it.
+
+### 11. Patents.
+
+A &ldquo;contributor&rdquo; is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The work thus
+licensed is called the contributor's &ldquo;contributor version&rdquo;.
+
+A contributor's &ldquo;essential patent claims&rdquo; are all patent claims owned or
+controlled by the contributor, whether already acquired or hereafter acquired, that
+would be infringed by some manner, permitted by this License, of making, using, or
+selling its contributor version, but do not include claims that would be infringed
+only as a consequence of further modification of the contributor version. For
+purposes of this definition, &ldquo;control&rdquo; includes the right to grant patent
+sublicenses in a manner consistent with the requirements of this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free patent license
+under the contributor's essential patent claims, to make, use, sell, offer for sale,
+import and otherwise run, modify and propagate the contents of its contributor
+version.
+
+In the following three paragraphs, a &ldquo;patent license&rdquo; is any express
+agreement or commitment, however denominated, not to enforce a patent (such as an
+express permission to practice a patent or covenant not to sue for patent
+infringement). To &ldquo;grant&rdquo; such a patent license to a party means to make
+such an agreement or commitment not to enforce a patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license, and the
+Corresponding Source of the work is not available for anyone to copy, free of charge
+and under the terms of this License, through a publicly available network server or
+other readily accessible means, then you must either (1) cause the Corresponding
+Source to be so available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner consistent with
+the requirements of this License, to extend the patent license to downstream
+recipients. &ldquo;Knowingly relying&rdquo; means you have actual knowledge that, but
+for the patent license, your conveying the covered work in a country, or your
+recipient's use of the covered work in a country, would infringe one or more
+identifiable patents in that country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or arrangement, you
+convey, or propagate by procuring conveyance of, a covered work, and grant a patent
+license to some of the parties receiving the covered work authorizing them to use,
+propagate, modify or convey a specific copy of the covered work, then the patent
+license you grant is automatically extended to all recipients of the covered work and
+works based on it.
+
+A patent license is &ldquo;discriminatory&rdquo; if it does not include within the
+scope of its coverage, prohibits the exercise of, or is conditioned on the
+non-exercise of one or more of the rights that are specifically granted under this
+License. You may not convey a covered work if you are a party to an arrangement with
+a third party that is in the business of distributing software, under which you make
+payment to the third party based on the extent of your activity of conveying the
+work, and under which the third party grants, to any of the parties who would receive
+the covered work from you, a discriminatory patent license (a) in connection with
+copies of the covered work conveyed by you (or copies made from those copies), or (b)
+primarily for and in connection with specific products or compilations that contain
+the covered work, unless you entered into that arrangement, or that patent license
+was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting any implied
+license or other defenses to infringement that may otherwise be available to you
+under applicable patent law.
+
+### 12. No Surrender of Others' Freedom.
+
+If conditions are imposed on you (whether by court order, agreement or otherwise)
+that contradict the conditions of this License, they do not excuse you from the
+conditions of this License. If you cannot convey a covered work so as to satisfy
+simultaneously your obligations under this License and any other pertinent
+obligations, then as a consequence you may not convey it at all. For example, if you
+agree to terms that obligate you to collect a royalty for further conveying from
+those to whom you convey the Program, the only way you could satisfy both those terms
+and this License would be to refrain entirely from conveying the Program.
+
+### 13. Use with the GNU Affero General Public License.
+
+Notwithstanding any other provision of this License, you have permission to link or
+combine any covered work with a work licensed under version 3 of the GNU Affero
+General Public License into a single combined work, and to convey the resulting work.
+The terms of this License will continue to apply to the part which is the covered
+work, but the special requirements of the GNU Affero General Public License, section
+13, concerning interaction through a network will apply to the combination as such.
+
+### 14. Revised Versions of this License.
+
+The Free Software Foundation may publish revised and/or new versions of the GNU
+General Public License from time to time. Such new versions will be similar in spirit
+to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program specifies that
+a certain numbered version of the GNU General Public License &ldquo;or any later
+version&rdquo; applies to it, you have the option of following the terms and
+conditions either of that numbered version or of any later version published by the
+Free Software Foundation. If the Program does not specify a version number of the GNU
+General Public License, you may choose any version ever published by the Free
+Software Foundation.
+
+If the Program specifies that a proxy can decide which future versions of the GNU
+General Public License can be used, that proxy's public statement of acceptance of a
+version permanently authorizes you to choose that version for the Program.
+
+Later license versions may give you additional or different permissions. However, no
+additional obligations are imposed on any author or copyright holder as a result of
+your choosing to follow a later version.
+
+### 15. Disclaimer of Warranty.
+
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM &ldquo;AS IS&rdquo; WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
+QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+### 16. Limitation of Liability.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY
+COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS
+PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
+INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE
+OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
+WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+### 17. Interpretation of Sections 15 and 16.
+
+If the disclaimer of warranty and limitation of liability provided above cannot be
+given local legal effect according to their terms, reviewing courts shall apply local
+law that most closely approximates an absolute waiver of all civil liability in
+connection with the Program, unless a warranty or assumption of liability accompanies
+a copy of the Program in return for a fee.
\ No newline at end of file
diff --git a/README.md b/README.md
index c8f3d64..69111be 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,82 @@
 # raven-python
 
-The Python counterpart of the
-[RAVEN Toolbox 2](https://github.com/SysBioChalmers/RAVEN) (MATLAB), built on
-[cobrapy](https://github.com/opencobra/cobrapy).
-
-`raven-python` covers de-novo reconstruction (KEGG + protein homology),
-context-specific model extraction (`tINIT` / `ftINIT`), metabolic-task
-validation, gap-filling, omics ingestion, sub-cellular localisation, model
-manipulation, and YAML / SIF / Excel I/O — preserving the established RAVEN
-workflows in a Python-native form.
-
-This `main` branch is intentionally empty. Development happens on the
-`develop` branch via a series of feature branches; see the open and merged
-pull requests for the current state of the port.
+[![CI](https://github.com/SysBioChalmers/raven-python/actions/workflows/ci.yml/badge.svg)](https://github.com/SysBioChalmers/raven-python/actions/workflows/ci.yml)
+
+**Reconstruction, Analysis and Visualisation of Metabolic Networks — in Python.**
+
+`raven-python` is the Python counterpart of the
+[RAVEN Toolbox 2](https://github.com/SysBioChalmers/RAVEN) (MATLAB). It builds on
+[**cobrapy**](https://github.com/opencobra/cobrapy) for everything cobrapy already does
+well (simulation, standard analyses, SBML I/O, model manipulation) and adds the
+functionality that's unique to RAVEN:
+
+* **De novo reconstruction** from KEGG and protein homology (BLAST / DIAMOND).
+* **Context-specific models** from omics data via **tINIT / ftINIT**, with task-aware
+  gap-filling and the linear-merge MILP reduction.
+* **Metabolic-task** validation (`check_tasks`, `fitTasks`).
+* **Connectivity gap-filling** against template models.
+* **Omics integration** — Human Protein Atlas (proteomics + RNA-seq) ingestion.
+* **Sub-cellular localisation** prediction by MILP, with partial-update mode and
+  pluggable predictors (WoLF PSORT, DeepLoc, …).
+* **N-model comparison**; **reporter metabolites**; **FSEOF**; **flux sampling**.
+* **YAML I/O** following the cobra standard, plus geckopy's `ec-*` enzyme-constrained
+  fields. **SIF** export. **RAVEN-style Excel** export.
+
+The status of every RAVEN function (ported, cheatsheet-mapped to cobra, or explicitly
+not ported) is documented function-by-function in
+**[docs/raven_migration.md](docs/raven_migration.md)**.
+
+## Design principle
+
+The canonical in-memory object is always a [`cobra.Model`](https://cobrapy.readthedocs.io).
+There is no parallel RAVEN struct, no `ravenCobraWrapper`-style adapter. RAVEN-specific
+fields that cobra doesn't model natively (`rxnMiriams`, `metDeltaG`,
+`rxnConfidenceScores`, …) live in cobra's `annotation` / `notes` dictionaries. This
+avoids duplicating cobra's data model and keeps raven-python interoperable with the wider
+COBRA ecosystem.
+
+## Status
+
+raven-python has been validated against MATLAB RAVEN on **Human-GEM** (5 Hart2015 cell-line
+models, Jaccard 0.975–0.980 — see [docs/humangem_validation.md](docs/humangem_validation.md)).
+The functional scope of the original RAVEN toolbox is covered with two principled
+omissions:
+
+* **MetaCyc-based reconstruction** is not implemented and is flagged for removal from
+  MATLAB RAVEN as well — see [IMPROVEMENTS.md](IMPROVEMENTS.md) under `R-MetaCyc`.
+* **Dynamic FBA** is not implemented — several maintained Python packages already cover
+  it ([`dfba`](https://pypi.org/project/dfba/), [`reframed`](https://pypi.org/project/reframed/),
+  [`mewpy`](https://pypi.org/project/mewpy/)).
+
+What's still open is catalogued in **[docs/todo.md](docs/todo.md)** (visualisation / Phase
+6 is the main item).
+
+## Installation (development)
+
+```bash
+git clone https://github.com/SysBioChalmers/raven-python
+cd raven-python
+pip install -e ".[dev]"
+```
+
+raven-python requires Python ≥ 3.11. Genome-scale (f)tINIT MILPs currently require **Gurobi**
+([details on solver portability](docs/init_solver_benchmark.md)); toy and unit-test work
+runs on the open-source GLPK.
+
+## Documentation
+
+See **[docs/README.md](docs/README.md)** for the documentation index.
+
+## Relationship to MATLAB RAVEN
+
+`raven-python` is a derivative work and is released under the same **GPL-3.0-or-later**
+license. If you use it in scientific work, please cite the RAVEN 2 paper:
+
+> Wang H, Marcišauskas S, Sánchez BJ, Domenzain I, Hermansson D, Agren R, Nielsen J,
+> Kerkhoven EJ. (2018) RAVEN 2.0: A versatile toolbox for metabolic network
+> reconstruction and a case study on *Streptomyces coelicolor*. PLoS Comput Biol 14(10):
+> e1006541.
+
+## License
+
+[GPL-3.0-or-later](LICENSE)
diff --git a/docs/humangem_validation.md b/docs/humangem_validation.md
new file mode 100644
index 0000000..bbaae6b
--- /dev/null
+++ b/docs/humangem_validation.md
@@ -0,0 +1,117 @@
+# Human-GEM cell-type model validation: raven-python vs RAVEN
+
+Validation of raven-python's tINIT/ftINIT against MATLAB RAVEN on a real genome-scale
+reconstruction (Human-GEM) using the Hart2015 RNA-seq dataset (5 cell lines: DLD1,
+GBM, HCT116, HELA, RPE1). The goal is functional equivalence — do raven-python and RAVEN
+extract the *same* context-specific reaction sets from the same inputs?
+
+## Method
+
+* **Template & inputs.** RAVEN built the ftINIT reference model from Human-GEM
+  (`prepHumanModelForftINIT`: remove drug/exchange/artificial reactions, set
+  spontaneous/custom lists) and exported it as `raven_refModel.xml` (10198 reactions).
+  raven-python builds on that *same* exported model, so the candidate reaction universe is
+  identical and set comparison is exact.
+* **Scoring.** Gene scores from `log2(TPM+1)`-style expression via
+  `gene_scores_from_expression`, mapped to reactions through the GPR
+  (`score_reactions_from_genes`), matching RAVEN's `getExprForRxnScore`.
+* **ftINIT.** Series `1+1` (2 staged MILP steps). RAVEN run via `ftINIT.m` with Gurobi;
+  raven-python via `raven_python.init.ftinit` with Gurobi (`mip_gap=0.001`, `time_limit=600`).
+* **tINIT.** raven-python `get_init_model` (classic single-MILP INIT) on HCT116, compared to
+  the ftINIT result for the same cell line.
+* **Tasks.** Two raven-python ftINIT variants: *no-task* (expression only) and
+  *task-constrained* (essential metabolic tasks, `metabolicTasks_Essential.txt`, force
+  task-essential reactions to be kept). RAVEN's reference is task-constrained.
+* **Solver.** Gurobi 13.0.1 for both tools.
+
+## Engineering findings (raven-python tractability)
+
+Getting ftINIT to run at genome scale surfaced three issues, all now fixed and matching
+RAVEN's design:
+
+1. **O(n²) constraint construction.** Building the steady-state balances with Python
+   `sum()` re-canonicalises a growing sympy expression at each term; hub metabolites
+   (ATP/H⁺/H₂O in ~10³ reactions) made one constraint take ~minutes (≈154 s total build,
+   benchmark: 1500-term `sum` = 59 s vs `optlang.symbolics.add` = 0.01 s). Fixed by
+   building flat term lists once per reaction and summing with `optlang.symbolics.add`
+   (in both ftINIT and tINIT).
+2. **Big-M too loose.** The on/off indicator constraints used each reaction's own bound
+   (±1000) as big-M; with `force_on=0.1` that is a ~10⁴ ratio → very weak LP relaxation
+   → Gurobi never closes the gap. RAVEN uses a fixed big-M = 100. Adopted.
+3. **Stoichiometric rescaling.** A fixed big-M=100 is only valid if no reaction needs
+   flux ≫100; ported RAVEN's `rescaleModelForINIT` (cap each reaction's coefficient
+   dynamic range at 25×, normalise mean |coeff| to 1) into `prep_init_model`. Without it
+   the staged MILP is infeasible (step-1 caps transports that step-0 used freely).
+
+Net effect: a full ftINIT cell-line solve went from *not finishing* to ~200 s,
+comparable to RAVEN.
+
+## Results
+
+### Reaction counts
+
+| cell line | RAVEN ftINIT | raven-python ftINIT (no-task) | raven-python ftINIT (task) |
+|-----------|-------------:|--------------------------:|-----------------------:|
+| DLD1      | 7782 | 7744 | 7774 |
+| GBM       | 7668 | 7667 | 7680 |
+| HCT116    | 7780 | 7752 | 7776 |
+| HELA      | 7832 | 7789 | 7816 |
+| RPE1      | 7569 | 7564 | 7570 |
+
+Counts agree within ~0.5 % everywhere; the task-constrained run is closest (e.g. RPE1
+7570 vs 7569, HCT116 7776 vs 7780). raven-python tINIT (HCT116) gives 6024 reactions — a
+smaller model, as expected from the different (classic INIT) objective.
+
+### Agreement — raven-python (no-task) ftINIT vs RAVEN ftINIT
+
+| cell line | shared | only raven-python | only RAVEN | Jaccard |
+|-----------|-------:|--------------:|-----------:|--------:|
+| DLD1   | 7667 |  77 | 115 | 0.976 |
+| GBM    | 7562 | 105 | 106 | 0.973 |
+| HCT116 | 7675 |  77 | 105 | 0.977 |
+| HELA   | 7707 |  82 | 125 | 0.974 |
+| RPE1   | 7470 |  94 |  99 | 0.975 |
+
+**~97.5 % of reactions are identical** between the two independent implementations, even
+though this run is *expression-only* while RAVEN's reference is task-constrained. The
+"only RAVEN" surplus (≈99–125) is expected to include task-essential reactions that the
+task-constrained run (below) recovers.
+
+### Agreement — raven-python (task-constrained) ftINIT vs RAVEN ftINIT
+
+| cell line | shared | only raven-python | only RAVEN | Jaccard |
+|-----------|-------:|--------------:|-----------:|--------:|
+| DLD1   | 7699 | 75 | 83 | 0.980 |
+| GBM    | 7588 | 92 | 80 | 0.978 |
+| HCT116 | 7696 | 80 | 84 | 0.979 |
+| HELA   | 7735 | 81 | 97 | 0.978 |
+| RPE1   | 7493 | 77 | 76 | 0.980 |
+
+Adding the essential metabolic tasks (same task list RAVEN uses) raises agreement to
+**Jaccard 0.978–0.980** and makes the disagreement symmetric (only-raven-python ≈ only-RAVEN
+≈ 80), confirming the prediction: the task constraints recover RAVEN's task-essential
+reactions. The residual ≈80 reactions each way out of ~7700 is at the level expected from
+MIP-gap tolerance (both accept near-optimal incumbents) and alternate optima.
+
+### raven-python tINIT vs ftINIT (HCT116)
+
+tINIT 6024 rxns vs ftINIT 7752; shared 5957, Jaccard 0.762. tINIT is nearly a subset
+(only 67 reactions unique to it) — the two methods agree on a common core, with ftINIT
+keeping more (its staged formulation and task handling are less aggressive at removal).
+This matches the expected tINIT/ftINIT relationship rather than indicating a defect.
+
+## Conclusions
+
+From identical inputs on a genome-scale human reconstruction, raven-python reproduces RAVEN's
+ftINIT reaction selection to **97.5 % (no-task) and 98 % (task-constrained) set identity**
+across five cell lines — strong evidence of functional equivalence between the two
+independent implementations. Agreement is symmetric and the residual (~80 reactions each
+way) is consistent with MIP near-optimality and alternate optima rather than any
+systematic divergence.
+
+Reaching genome-scale tractability required matching RAVEN's numerical-conditioning
+choices and fixing optlang-specific construction costs (see *Engineering findings*):
+fixed big-M = 100, `rescaleModelForINIT`, `optlang.symbolics.add` instead of Python
+`sum()` in every MILP build (ftINIT, tINIT, and the gap-fill). With these, a
+task-constrained cell-line model builds in ~15–25 min (dominated by the
+essential-forced staged MILP) and a no-task one in ~3 min, comparable to RAVEN.
diff --git a/docs/init_param_calibration.md b/docs/init_param_calibration.md
new file mode 100644
index 0000000..cc69314
--- /dev/null
+++ b/docs/init_param_calibration.md
@@ -0,0 +1,342 @@
+# (f)tINIT parameter calibration & input-robustness
+
+Empirical study of raven-python's (f)tINIT parameters on a genome-scale model (Human-GEM,
+Hart2015 / HCT116). Two questions:
+
+1. **Calibration** — on clean data, which parameter values give the best speed/quality
+   trade-off? (`scripts/analyze_init_params.py`)
+2. **Robustness** — with the task layer always on (it is part of the pipeline, not a
+   variable), how does degrading the *transcriptomics input* affect the model, and which
+   parameters keep it functional and stable? (`scripts/analyze_init_robustness.py`)
+
+Both scripts are resumable and reusable on any model/dataset; the numbers below are HCT116.
+"Jaccard" is reaction-set overlap with the reference (tightest setting / clean data) — for
+a model-extraction tool the reaction set is the product, and a MIP gap bounds only the
+*objective*, so set-stability is tracked separately.
+
+---
+
+## 1. Clean-data calibration
+
+### ftINIT MILP — `mip_gap` (single step-0 solve, big_m=100, force_on=0.1)
+
+| mip_gap | time (s) | objective | rel.obj.gap | Jaccard vs tightest |
+|--------:|---------:|----------:|------------:|--------------------:|
+| 0.0002 | 48 | 49357 | ref | ref |
+| 0.001 | 44 | 49357 | +0.0000 | **1.0000** |
+| 0.003 | 42 | 49289 | +0.0014 | 0.9973 |
+| 0.01 | 42 | 49185 | +0.0035 | 0.9935 |
+| 0.03 | 52 | 49185 | +0.0035 | 0.9935 |
+| 0.1 | 46 | 45615 | +0.0758 | 0.9469 |
+
+**Solve time is essentially flat across the gap** (the model build dominates), so a tight
+gap is nearly free. `mip_gap=0.001` reproduces the proven optimum exactly (Jaccard 1.0);
+quality only collapses at 0.1. → **Default 0.001.** (The genome-scale staged pipeline still
+needs *some* gap + a `time_limit` because the full essential-forced MILP can be much harder
+than this single step — see robustness timings.)
+
+### ftINIT MILP — `big_m` (gap=0.001, force_on=0.1)
+
+| big_m | time (s) | rel.obj.gap | Jaccard vs big_m=100 |
+|------:|---------:|------------:|---------------------:|
+| 100 | 51 | ref | ref |
+| 50 | 54 | +0.0006 | 0.983 |
+| 25 | 53 | +0.0007 | 0.982 |
+| 250 | 55 | +0.0005 | 0.984 |
+| 1000 | 59 | +0.0001 | 0.986 |
+
+At step-0 (on the *scaled* model) `big_m` barely affects objective or time, but shifts which
+reactions are kept by ~2% (alternate optima). `big_m=100` is RAVEN's value and is required
+for the *staged* pipeline to stay feasible (a fixed 100 is only valid with stoichiometric
+rescaling — see §1.4). → **Default 100.**
+
+### ftINIT MILP — `force_on` (gap=0.001, big_m=100)
+
+| force_on | time (s) | rel.obj.gap | Jaccard vs 0.1 |
+|---------:|---------:|------------:|---------------:|
+| 0.1 | 63 | ref | ref |
+| 0.02 | 69 | +0.0005 | 0.983 |
+| 0.05 | 56 | +0.0000 | 0.990 |
+| 0.2 | 59 | +0.0004 | 0.982 |
+| 0.5 | 79 | +0.0005 | 0.985 |
+
+`force_on` (minimum flux for a reaction to count as "on") changes the *model*, not just a
+tolerance, but the reaction set is fairly insensitive (Jaccard ≥0.98) and the objective
+hardly moves. → **Default 0.1** (RAVEN), no strong reason to change.
+
+### prep scaling — `rescaleModelForINIT` `max_stoich_diff` and on/off (gap=0.001, big_m=100)
+
+| config | time (s) | rel.obj.gap | Jaccard vs scaled msd=25 |
+|--------|---------:|------------:|-------------------------:|
+| scale on, msd=25 | 51 | ref | ref |
+| msd=10 | 49 | +0.0075 | 0.989 |
+| msd=50 | 61 | +0.0003 | 0.982 |
+| msd=100 | 62 | −0.0001 | 0.986 |
+| scale off | 45 | +0.0129 | 0.973 |
+
+At step-0 even `scale=off` is feasible, but it drifts most (Jaccard 0.973, objective +1.3%);
+`max_stoich_diff` 10–100 are all within ~1%. **This understates scaling's importance** — at
+step-0 there is no big-M cap on the held-out transports. In the *full staged pipeline*,
+`scale=off` with `big_m=100` is **infeasible** (step-1 caps transports that step-0 used
+freely). → **Keep scaling on, msd=25** (RAVEN's default).
+
+**Calibration summary (defaults are well-chosen):** `mip_gap=0.001`, `big_m=100`,
+`force_on=0.1`, scaling on (`max_stoich_diff=25`). For the genome-scale staged pipeline also
+set a `time_limit` (≈120–600 s/step) so a hard essential-forced step returns a near-optimal
+incumbent rather than grinding.
+
+### tINIT MILP (`get_init_model`, `essential_rxns=[]`, `time_limit=400s`)
+
+**mip_gap** (eps=1, prod_weight=0.5):
+
+| mip_gap | time (s) | n_kept | Jaccard vs gap=0.001 |
+|--------:|---------:|-------:|---------------------:|
+| 0.001 | 901 | 6024 | ref |
+| 0.003 | 869 | 6036 | 0.991 |
+| 0.01 | 595 | 5967 | 0.968 |
+
+Tightening the gap costs ~50% more wall time on this MILP (unlike ftINIT step-0, build
+doesn't dominate); a 1% gap is ~30% faster with ~3% reaction-set drift.
+→ **`mip_gap=0.001`** for stability, **0.01** for a faster looser solve.
+
+**eps** (gap=0.005, the connectivity-flux threshold — *changes the model*):
+
+| eps | n_kept | Jaccard vs eps=1.0 |
+|----:|-------:|-------------------:|
+| 0.1 | 6119 | 0.952 |
+| 0.5 | 6123 | 0.952 |
+| 1.0 | 6064 | ref |
+| 2.0 | 6090 | 0.960 |
+
+Each `eps` value gives a slightly different model (Jaccard ~0.95 across the range); the
+reaction-set spread is ~5%. `eps=1.0` is RAVEN's default; smaller values produce *slightly*
+larger models (loosen the connectivity bar). Pick by what the data justifies — see the
+caveat at the top of `init.py`.
+
+**prod_weight** (gap=0.005, the metabolite-production reward — *changes the model*):
+
+| prod_weight | n_kept | Jaccard vs 0.5 |
+|------------:|-------:|---------------:|
+| 0.0 | 5973 | 0.961 |
+| 0.25 | 6015 | 0.974 |
+| 0.5 | 6064 | ref |
+| 1.0 | 6106 | 0.955 |
+
+A higher `prod_weight` keeps slightly more reactions (rewards more connectivity); `0.5`
+(RAVEN's default) is the middle of the range. Effect is modest (~5%).
+
+**big_m** (gap=0.005, `None` = per-reaction `ub`):
+
+| big_m | n_kept | Jaccard vs None |
+|------:|-------:|----------------:|
+| None (per-rxn ub) | 6064 | ref |
+| 1000 | 6064 | **1.000** |
+| 250 | 6114 | 0.953 |
+| 100 | 6023 | 0.930 |
+
+`big_m=1000` is identical to `big_m=None` here because the model's `ub` is ±1000 already
+(so the per-reaction cap *is* 1000). Smaller fixed caps (250, 100) shift alternate optima
+by 5–7% but do not change the objective. Unlike ftINIT, tINIT has *not* been run through
+`rescaleModelForINIT`, so dropping `big_m` below 1000 may invalidate the LP feasibility
+region for reactions whose own bound is larger — keep the default (per-reaction `ub`).
+
+**tINIT calibration summary:** `mip_gap=0.001` (or 0.01 for ~30% speedup at ~3% drift);
+`eps`, `prod_weight`, `big_m` defaults are fine — they all change the *model*, not just
+tolerance, so tune by what the data and biology call for, not by these tables.
+
+### ftINIT full pipeline (`ftinit`, series='1+1', no-task scaled prep, `time_limit=600s`)
+
+| config | time (s) | n_kept | Jaccard vs gap=0.001 |
+|--------|---------:|-------:|---------------------:|
+| **mip_gap=0.001** (default big_m=100) | 346 | 7752 | ref |
+| mip_gap=0.003 | 288 | 7748 | 0.993 |
+| mip_gap=0.01 | 218 | 7746 | **0.995** |
+| big_m=50 (gap=0.003) | 738 | 7799 | 0.974 |
+| big_m=250 (gap=0.003) | 345 | 7766 | 0.977 |
+
+Unlike the single-step ftINIT MILP in §1.1 (where build time dominated and the gap was
+free), **the full pipeline does benefit from a looser gap**: `mip_gap=0.01` is ~37 %
+faster than `0.001` with Jaccard 0.995 — essentially the same model. → **For genome-scale
+ftINIT, `mip_gap=0.01` (or 0.005) is the sweet spot**; keep 0.001 only if exact
+reproducibility matters more than a few minutes.
+
+`big_m=50` is actually *slower* than the default 100 (738s vs 346s) — a tighter cap makes
+the LP relaxation harder for borderline reactions; `big_m=250` is the same speed as 100
+but shifts the reaction set ~2 %. → **Keep `big_m=100`** (RAVEN's value, what scaling is
+designed for).
+
+### tINIT + many task-essential reactions: a structural limitation
+
+ftINIT's task layer (gap-fill) and tINIT's task layer (forcing `essential_rxns`) are
+*not equivalent*. tINIT forces every essential reaction to carry `flux ≥ eps`. With
+Human-GEM's 113 task-essential reactions (the validation set), the resulting steady-state
+system is infeasible regardless of `eps`:
+
+| essentials passed to `run_init` | result |
+|---|---|
+| 0 (the original validation call) | ✅ ok, 6024 reactions |
+| 113 (merged-survivor IDs from `prep.essential_rxns`) | ❌ `infeasible` (proven by Gurobi presolve, ~330s) |
+| 260 (pre-merge IDs from `find_task_essential_reactions` cache) | ❌ `infeasible` (~480s) |
+
+Lowering `eps` (1.0 → 0.1) does **not** fix it; the issue is that 100+ reactions cannot
+simultaneously each carry a fixed positive flux in their forced direction at steady state.
+ftINIT avoids this by using an *adaptive* per-reaction forcing magnitude
+(`min(0.99·|previous flux|, force_on)`) so each essential is forced at a value it
+*actually carried* in a prior feasible solution. tINIT's one-size-fits-all `eps`
+mechanism doesn't have that escape hatch.
+
+**Practical takeaway.** For functional context-specific models on genome-scale data, use
+ftINIT — the task layer (gap-fill, adaptive essential forcing) is what makes the pipeline
+robust. tINIT remains useful for the small/no-essentials case (e.g. the
+expression-only baseline in the validation), but pairing it with the full task-essential
+set is a known incompatibility; the tINIT robustness study below is therefore reported
+with `essential_rxns=[]`.
+
+---
+
+## 2. Robustness to degraded transcriptomics (task layer always on)
+
+The metabolic-task + gap-fill layer is held fixed; only the expression input is degraded.
+`frac` = fraction of the 69 essential tasks the extracted model performs (`check_tasks`);
+`Jaccard` = reaction-set overlap with the clean-data model.
+
+| input | n_rxns | tasks pass | frac | Jaccard vs clean |
+|-------|-------:|-----------:|-----:|-----------------:|
+| **clean** | 7777 | 69/69 | 1.000 | ref |
+| dropout 50% | 5968 | 67/69 | 0.971 | **0.713** |
+| dropout 70% | 5113 | 68/69 | 0.986 | **0.594** |
+| noise σ=1.0 | 7812 | 69/69 | 1.000 | 0.952 |
+| noise σ=2.0 | 7768 | 69/69 | 1.000 | 0.919 |
+| downsample 50% | 6765 | 68/69 | 0.986 | 0.815 |
+| downsample 70% | 6123 | 68/69 | 0.986 | 0.728 |
+
+(dropout = genes set to 0 → score −5; noise = ×`exp(N(0,σ))`; downsample = genes dropped →
+`no_gene_score`.)
+
+**Findings:**
+
+* **Robust to noise, sensitive to sparsity.** Multiplicative expression noise barely changes
+  the model (Jaccard 0.92–0.95, size stable, all tasks pass). Sparsity is far more damaging:
+  50% dropout already drops the reaction set to **0.71 Jaccard** (and shrinks 7777→5968), 70%
+  to **0.59**.
+* **Sparsity shrinks the model toward the task-essential core.** Missing/zeroed genes remove
+  the expression evidence for a reaction; the task layer only adds back what tasks require, so
+  sparse input yields smaller, more "generic" models. Dropout (−5) is harsher than
+  downsampling (−2).
+* **Functionality is largely but not perfectly preserved.** With the task layer, `frac` stays
+  ≥0.97, but dips to 67–68/69 under heavy sparsity — i.e. the bounded gap-fill plus the
+  post-hoc low-score-gene pruning occasionally leave 1–2 essential tasks unsatisfied. (See the
+  lever sweep below for whether `no_gene_score`/`force_on` recover them.)
+* **Cost tracks damage.** Dropout runs are slowest (more broken tasks → more gap-fill);
+  noise is cheap.
+
+> **Tractability note (a parameter that prevents failure):** the gap-fill MILP must be bounded
+> (`mip_gap`/`time_limit`). Unbounded, severe degradation (which breaks many tasks at once)
+> makes it solve a hard min-cost MILP per broken task to proven optimality — observed to run
+> >75 min for one 90%-dropout model. With the bound it returns a near-optimal fill quickly.
+
+### Levers at dropout 70% — which parameter best stabilises the model?
+
+| config | n_rxns | frac | Jaccard vs clean |
+|--------|-------:|-----:|-----------------:|
+| default (no_gene_score=−2, force_on=0.1) | 5113 | 0.986 | 0.594 |
+| no_gene_score=−1.0 | 5110 | 0.986 | 0.593 |
+| no_gene_score=−0.5 | 5128 | 0.986 | 0.593 |
+| force_on=0.2 | 5159 | 0.986 | 0.600 |
+
+**No lever recovers the drift** — Jaccard stays ~0.59 across all settings. Two reasons,
+both informative:
+
+* The information dropout destroys is simply gone; no scoring/connectivity knob reconstructs
+  the missing expression evidence. You cannot tune your way out of sparse input.
+* `no_gene_score` is the wrong knob *for dropout specifically*: dropout leaves genes
+  *present but zero* (scored −5), whereas `no_gene_score` only governs reactions whose genes
+  are **absent** from the data — i.e. the *downsampling* failure mode. So `no_gene_score` is
+  a meaningful lever for missing-data sparsity (a less-negative value keeps more
+  unmeasured reactions, growing the model back toward clean), but it has nothing to act on
+  under dropout.
+
+**Practical takeaway.** The robustness levers that matter are *structural*, not numeric: the
+task + gap-fill layer (keeps the model functional regardless of input quality) and a bounded
+gap-fill MILP (keeps it tractable). For *missing*-gene sparsity specifically, `no_gene_score`
+trades model size against confidence. For noise, defaults are already robust. No parameter
+restores fidelity lost to dropout — that is a property of the data, not the pipeline.
+
+### tINIT robustness — `essential_rxns=[]` (the tINIT-without-task-layer picture)
+
+For the reasons in §1.5, tINIT cannot accept the full task-essential set as forced
+reactions; this section runs `get_init_model` with `essential_rxns=[]` to show the
+realistic tINIT behaviour on the same degradation gradient — i.e. the *cost of not
+having ftINIT's gap-fill safety net*.
+
+| input | n_rxns | tasks pass | frac | Jaccard vs clean |
+|-------|-------:|-----------:|-----:|-----------------:|
+| **clean** | 6277 | **35/69** | **0.507** | ref |
+| dropout 50% | 4910 | 23/69 | 0.333 | 0.673 |
+| dropout 70% | 2807 | 21/69 | 0.304 | 0.408 |
+| noise σ=1.0 | 6661 | 25/69 | 0.362 | 0.878 |
+| noise σ=2.0 | 6146 | 24/69 | 0.348 | 0.869 |
+| downsample 50% | 5006 | 24/69 | 0.348 | 0.722 |
+| downsample 70% | 3541 | 19/69 | 0.275 | 0.515 |
+
+**The headline contrast with ftINIT:**
+
+| | ftINIT (task layer) | tINIT (no task layer) |
+|---|---|---|
+| clean | 7777 rxns, **69/69 (1.000)** | 6277 rxns, **35/69 (0.507)** |
+| dropout 0.7 | 5113 rxns, **68/69 (0.986)**, J 0.594 | 2807 rxns, **21/69 (0.304)**, J 0.408 |
+| noise σ=2.0 | 7768 rxns, **69/69 (1.000)**, J 0.919 | 6146 rxns, **24/69 (0.348)**, J 0.869 |
+| downsample 0.7 | 6123 rxns, **68/69 (0.986)**, J 0.728 | 3541 rxns, **19/69 (0.275)**, J 0.515 |
+
+* tINIT-without-gap-fill fails roughly **half the essential tasks even on clean data**;
+  ftINIT-with-gap-fill passes them all. Under degradation tINIT collapses further (down
+  to 19/69 at 70 % downsample), ftINIT stays ≥67/69 throughout.
+* **Reaction-set drift is comparable** under noise (Jaccard 0.87 vs 0.92) but worse for
+  tINIT under sparsity (0.41 vs 0.59 at 70 % dropout) because there's no gap-fill to
+  re-add structurally needed reactions.
+
+This is *not* a critique of the tINIT algorithm — classic INIT was designed for the
+no-task-layer case. It is the empirical evidence for why ftINIT's design choices (task
++ gap-fill, adaptive essential forcing) are the right ones for genome-scale tissue
+model extraction, and why tINIT is mostly useful here as a baseline.
+
+#### tINIT levers at dropout 70%
+
+| config | n_rxns | tasks pass | frac | Jaccard vs clean |
+|--------|-------:|-----------:|-----:|-----------------:|
+| default (prod_weight=0.5, eps=0.1) | 2807 | 21/69 | 0.304 | 0.408 |
+| prod_weight=0.0 | 2791 | 21/69 | 0.304 | 0.416 |
+| prod_weight=1.0 | 3386 | 22/69 | 0.319 | 0.485 |
+| prod_weight=2.0 | 3888 | 21/69 | 0.304 | 0.458 |
+| eps=0.5 | 2620 | 21/69 | 0.304 | 0.391 |
+| eps=1.0 | 3311 | 22/69 | 0.319 | 0.460 |
+
+Same conclusion as the ftINIT levers: parameter tuning can nudge (`prod_weight≥1.0`
+or a larger `eps` modestly grows the model and lifts Jaccard from 0.41 to ~0.48), but
+**no tINIT parameter recovers anything close to ftINIT's functionality** (22/69 at best
+vs ftINIT's 67–69/69 at the same dropout). The gap-fill layer, not the parameter
+choice, is what bridges the gap.
+
+---
+
+## 3. Cross-solver portability
+
+See [init_solver_benchmark.md](init_solver_benchmark.md) for the genome-scale
+solver comparison (Gurobi/HiGHS/GLPK) and [tests/test_init_solvers.py](../tests/test_init_solvers.py)
+for CI parameterised over installed MILP backends. Headline: at genome scale only Gurobi
+is viable today; HiGHS fails on an upstream optlang `hybrid_interface.clone()` bug; GLPK
+ignores `configuration.timeout` on MIP and ran 1 h+ without converging. Toy-scale
+correctness is portable (Gurobi + GLPK give identical verdicts on the unit-test
+networks), so local development works without a Gurobi licence.
+
+---
+
+## Reproducing
+
+```bash
+python scripts/analyze_init_params.py    --cell HCT116 --sweeps ftinit_milp,prep_scale,tinit,ftinit_full
+python scripts/analyze_init_robustness.py --cell HCT116 --algo ftinit   # then --algo tinit
+```
+
+Both reuse the cached Human-GEM preps from the validation run
+([docs/humangem_validation.md](humangem_validation.md)) and are resumable.
diff --git a/docs/init_solver_benchmark.md b/docs/init_solver_benchmark.md
new file mode 100644
index 0000000..1cd97ac
--- /dev/null
+++ b/docs/init_solver_benchmark.md
@@ -0,0 +1,67 @@
+# Cross-solver ftINIT benchmark — Human-GEM / HCT116
+
+Same `ftinit()` call (no-task scaled prep; `mip_gap=0.001`, `time_limit=900s`) run with each
+installed MILP-capable optlang interface. Generated by `scripts/analyze_init_solvers.py`;
+companion to the CI-scale `tests/test_init_solvers.py`.
+
+## Per-solver result
+
+| solver | time (s) | status | n_rxns |
+|--------|---------:|--------|-------:|
+| **gurobi** | 518 | ✅ ok | 7752 |
+| **hybrid** (HiGHS) | 55 | ❌ FAIL: `ValueError: LP Method primal is not valid (choose one of: auto, simplex, interior point)` | 0 |
+| **glpk** | 3672 | ❌ FAIL: did not converge in 1 h+ (`configuration.timeout` not honored by GLPK MIP) | 0 |
+
+> Wall clocks on Gurobi 13.0.1, optlang 1.x, cobra; one Human-GEM HCT116 cell line.
+
+## Findings
+
+* **Gurobi** is the only MILP backend that actually completes ftINIT on Human-GEM here:
+  ~9 min for 7752 reactions (matches the [validation](humangem_validation.md) result).
+  All our tractability tuning (big-M=100, `rescaleModelForINIT`, `mip_gap`,
+  `time_limit`) was done on Gurobi and it pays off.
+* **HiGHS** (`hybrid_interface`) **does not work with cobra at all in this stack** — not
+  raven-python's bug. Cobra sets `model.solver = "hybrid"` which calls
+  `optlang.interface.Model.clone()`, which re-applies a stored `lp_method="primal"`
+  parameter that the `hybrid_interface.Configuration` rejects (it accepts only
+  `auto/simplex/interior point`). This breaks `model.copy()` and any flow that swaps
+  the solver — i.e. the whole pipeline. The same failure mode shows up at toy scale in
+  `tests/test_init_solvers.py` (5/5 fail), so CI catches it now. Upstream optlang/cobra
+  patch needed; nothing to fix in raven-python.
+* **GLPK** loads the model but its MIP solver does **not honor
+  `configuration.timeout`** for this problem — we set the 900 s wall limit, GLPK still
+  ran 1 h+ at 100 % CPU without producing a solution and had to be killed. GLPK has no
+  licensing burden but is not a viable MILP backend at genome scale for ftINIT in
+  practice.
+
+## Practical implications
+
+* **Production / genome-scale ftINIT requires Gurobi** today. We should be explicit
+  about this in the package docs (license-encumbered dependency) until either the
+  optlang `hybrid_interface` clone bug is fixed or GLPK gains usable MIP time-limit
+  support.
+* **Toy / unit-test correctness is portable.** `tests/test_init_solvers.py` shows Gurobi
+  and GLPK give identical verdicts on the toy ftINIT/tINIT networks; the formulation
+  itself is solver-independent. Local development and CI work without a Gurobi license;
+  only the genome-scale runs need it.
+* **Future portability work** is two concrete upstream fixes:
+  1. optlang `hybrid_interface.Configuration` should accept (or remap) the `lp_method`
+     parameter values that the generic clone path emits, or the clone path should drop
+     unknown LP-method values gracefully.
+  2. GLPK's MIP solve should honor `configuration.timeout`. If upstream won't,
+     raven-python could implement a watchdog (separate thread sending `SIGINT` after the
+     wall limit) specifically when the solver is GLPK.
+
+## Reproducing
+
+```bash
+# CI parameterised tests (seconds, runs always):
+python -m pytest tests/test_init_solvers.py -v
+
+# Genome-scale benchmark (minutes-to-hours, manual):
+python scripts/analyze_init_solvers.py --cell HCT116 \
+    --doc docs/init_solver_benchmark.md
+```
+
+Both reuse the cached Human-GEM no-task prep from the validation run
+([humangem_validation.md](humangem_validation.md)) and are resumable per solver.
diff --git a/docs/kegg_data_format.md b/docs/kegg_data_format.md
new file mode 100644
index 0000000..efb6d13
--- /dev/null
+++ b/docs/kegg_data_format.md
@@ -0,0 +1,72 @@
+# KEGG relational-table storage format
+
+This note records *why* raven-python stores its KEGG-derived relational tables as
+**gzipped TSV**, and what other options we deliberately deferred. It applies to
+the maintainer-built KEGG artefacts described in PLAN.md §2.3b — the `ko_reaction`,
+`organism_gene_ko`, KO-name, and reaction-flag tables.
+
+The reference GEM itself is stored as **gzipped RAVEN/cobra YAML**
+(`reference_model.yml.gz`) — RAVEN-native and MATLAB-readable, gzipped to match the
+tables (the YAML I/O transparently gzips on a `.gz` suffix). On the real KEGG dump
+this is ~1.1 MB (vs ~30 MB as SBML) for the full 12k-reaction gene-free model.
+
+End users do not build any of this: the published artefacts are fetched and cached
+under `~/.cache/raven-python/data/kegg-<version>/` by `ensure_data` (see
+`raven_python.data`), mirroring how binaries are provisioned.
+
+## Decision (current)
+
+- **Small tables** (`ko_reaction`, `ko_names`, `rxn_flags`): **gzipped TSV
+  (`.tsv.gz`)**. Each is well under 1 MB, so compression choice is irrelevant;
+  gzip keeps them MATLAB-native and dependency-free.
+- **The large `organism_gene_ko` table**: **xz-compressed TSV
+  (`organism_gene_ko.tsv.xz`), with rows sorted by `(organism, gene)`**.
+
+Why the large table differs. It carries KEGG's ~9M gene↔KO associations and
+dominates the artefact set (≈78 MB as unsorted gzipped TSV). Two cheap,
+stdlib-only changes cut that to ≈27 MB (2.9×):
+
+1. **Sort by `(organism, gene)`** before writing. Gene IDs from one organism
+   share long common prefixes (locus tags, numeric runs); sorting makes them
+   adjacent so the compressor can fold them. This alone takes 78 → 48 MB and
+   happens to match the by-organism query pattern in
+   `get_kegg_model_for_organism`. The sort is an external merge sort bounded to
+   `chunk_rows` in memory (see `stream_organism_gene_ko`), so it stays scalable.
+2. **xz instead of gzip** (Python stdlib `lzma`). Its larger dictionary captures
+   cross-row redundancy gzip's 32 KB window misses: sorted + xz reaches ≈27 MB.
+
+- **pandas reads/writes both with zero extra dependencies** — compression is
+  inferred from the `.gz`/`.xz` suffix; `lzma` and `gzip` are both stdlib, so
+  this works natively on Windows, macOS, and Linux with no external binary.
+- **MATLAB caveat:** `readtable` reads gzipped TSV after a `gunzip`, but MATLAB
+  has no built-in xz decompressor. The small tables stay MATLAB-native; the
+  large table needs an external `unxz` (or Java/`7-Zip`) before `readtable` on
+  the MATLAB side. The xz file is raven-python's (Python) primary artefact; this
+  trades a little MATLAB convenience on the one big file for a ~3× size cut.
+
+## Options considered
+
+| Format | Python cost | MATLAB cost | Notes |
+| --- | --- | --- | --- |
+| **Gzipped TSV** ✅ | none (stdlib/pandas) | none (`readtable`) | Universal, text, types re-specified on read. Chosen. |
+| Parquet | `pyarrow` or `fastparquet` (~40–60 MB wheel) as a `raven-python[kegg]` extra | needs ≥ R2019a (`parquetread`, native) | Smaller, faster, typed, columnar. Win mainly at scale / repeated random access. |
+| SQLite | none (stdlib `sqlite3`) | **needs Database Toolbox** | Rejected: the MATLAB-side toolbox requirement breaks the "same files, both languages, no extra deps" goal. |
+
+## When to revisit
+
+Reconsider Parquet (or SQLite) if any of these become true:
+
+- The `organism_gene_ko` table grows large enough that load *time* (not just
+  size — the sort+xz change above already addresses on-disk size) becomes a real
+  bottleneck. The remaining inefficiency is that building one species' model
+  still loads all ~9M rows; sorted order makes a `searchsorted`/row-group
+  by-organism read the natural next step before reaching for Parquet.
+- We start doing repeated random-access / columnar reads rather than a single
+  load-once-per-run pattern.
+- A typed, self-describing schema becomes valuable (TSV loses dtypes; they are
+  re-specified on read).
+
+If revisited, prefer **Parquet** over SQLite (no MATLAB toolbox dependency; MATLAB
+reads Parquet natively from R2019a). It could be offered as an optional
+`raven-python[kegg]` extra (pyarrow) alongside the TSV default, rather than replacing
+it — keeping the dependency-free path intact for users who don't opt in.
diff --git a/docs/kegg_hmm_cutoff_calibration.md b/docs/kegg_hmm_cutoff_calibration.md
new file mode 100644
index 0000000..43e3b3e
--- /dev/null
+++ b/docs/kegg_hmm_cutoff_calibration.md
@@ -0,0 +1,203 @@
+# KEGG HMM-query cut-off calibration
+
+This note records the measurements behind the default KO-assignment parameters in
+`reconstruction/kegg/query.py` (`assign_kos` / `get_kegg_model_from_sequences`,
+pipeline step 3b.5) and IMPROVEMENTS **K15**. It is the evidence for moving away
+from RAVEN's `1e-50` cut-off.
+
+## What the parameters do
+
+`assign_kos` turns an `hmmscan` KO×gene E-value matrix into gene→KO assignments
+in three steps:
+
+1. **`cutoff`** — keep hits with `evalue <= cutoff`.
+2. **`min_score_ratio_ko`** — within a KO, drop genes whose
+   `log(evalue)/log(best_evalue_in_KO) < min_score_ratio_ko`.
+3. **`min_score_ratio_g`** — within a gene, drop KOs whose
+   `log(evalue)/log(best_evalue_for_gene) < min_score_ratio_g`.
+
+## Method
+
+- **Data:** KEGG release 118. Libraries: the maintainer-built `prokaryotes.hmm`
+  (831 MB) and `eukaryotes.hmm` (692 MB), 90 %-clustered, FFT-NS-2/PartTree (K12).
+- **Queries:** each organism's full proteome, extracted from `genes.pep`.
+- **Ground truth:** the organism's *real* KEGG gene→KO links, from the
+  `organism_gene_ko` table (restricted, as the table is, to reaction-linked KOs).
+- **Prediction:** `assign_kos` output, with the `organism:` prefix stripped from
+  query gene IDs so they match the bare gene IDs in the ground truth.
+- **Metrics (gene→KO level):** precision = |pred ∩ truth| / |pred|,
+  recall = |pred ∩ truth| / |truth|, F1. Reaction-level: `rxn_rec` = fraction of
+  the organism's true reactions recovered (KO→reaction via `ko_reaction`);
+  `rxn_novel` = predicted reactions **not** in the annotation set.
+- Reproduce with [`scripts/analyze_hmm_cutoffs.py`](../scripts/analyze_hmm_cutoffs.py).
+
+### Important caveat
+
+All four organisms are **present in the libraries' training set**, so their own
+sequences hit their KO profiles strongly and recall is an upper bound. The
+calibration is therefore *relative* (how the parameters trade off, and where
+RAVEN's default sits relative to the signal), not an absolute accuracy estimate.
+A genome genuinely absent from KEGG would be the next validation. Also note that
+`rxn_novel` / "precision < 1" partly reflects **legitimate homology** KEGG never
+annotated for that organism (paralogs, un-curated genes), not pure error — so the
+precision figures are a lower bound on real precision.
+
+## Organisms
+
+| code | organism | library | proteome (seqs) | true gene→KO pairs | true reactions |
+|---|---|---|---|---|---|
+| `sce` | *Saccharomyces cerevisiae* (budding yeast) | euk | 6021 | 841 | 1217 |
+| `cme` | *Cyanidioschyzon merolae* (red alga) | euk | 5010 | 709 | 1157 |
+| `eco` | *Escherichia coli* K-12 MG1655 | prok | 4288 | 1071 | 1548 |
+| `mge` | *Mycoplasmoides genitalium* G37 (minimal genome) | prok | 476 | 110 | 211 |
+
+`sce`/`eco` are model organisms; `cme`/`mge` are lesser-studied, `mge`
+additionally being a small, divergent genome.
+
+## 1. E-value separation (the key result)
+
+`log10(E-value)` percentiles of the best hit per (gene, KO) pair, split by whether
+the pair is in the organism's annotation (**matched**) or not (**novel**). Smaller
+(more negative) = stronger hit.
+
+| organism | group | n | p50 | p90 | p95 | p99 |
+|---|---|---|---|---|---|---|
+| `sce` | matched | 835 | −155 | −75 | −59 | −33 |
+| `sce` | novel | 9467 | −8 | −2 | −0 | 1 |
+| `cme` | matched | 704 | −133 | −63 | −47 | −25 |
+| `cme` | novel | 10170 | −8 | −2 | −2 | 0 |
+| `eco` | matched | 1070 | −142 | −69 | −57 | −36 |
+| `eco` | novel | 27357 | −7 | −2 | −1 | −0 |
+| `mge` | matched | 110 | −100 | −42 | −35 | −15 |
+| `mge` | novel | 1904 | −4 | −2 | −1 | −0 |
+
+**Reading:** matched pairs cluster at E ≈ 1e-100…1e-155; even their weakest 1 %
+sit at 1e-15…1e-36. Novel pairs cluster at ≈1e-8. The two are separated by ~20
+orders of magnitude. RAVEN's **`1e-50` lies inside the *matched* tail** (between
+the matched p90 and p95 for most organisms; past p90 for `mge`), so it discards
+real-but-weakly-scoring annotations while gaining nothing against the (far weaker)
+noise.
+
+## 2. Cut-off sweep
+
+`min_score_ratio_ko = 0.3`, `min_score_ratio_g = 0.8` fixed; gene→KO precision /
+recall / F1 and reaction recovery vs the annotation.
+
+### `sce`
+| cutoff | gKO prec | gKO rec | gKO F1 | rxn rec | rxn novel |
+|---|---|---|---|---|---|
+| 1e-10 | 0.57 | 0.98 | 0.72 | 0.99 | 334 |
+| 1e-20 | 0.65 | 0.98 | 0.78 | 0.97 | 283 |
+| 1e-30 | 0.72 | 0.97 | 0.83 | 0.97 | 216 |
+| 1e-50 | 0.78 | 0.95 | 0.86 | 0.96 | 157 |
+| 1e-70 | 0.81 | 0.91 | 0.86 | 0.91 | 113 |
+| 1e-100 | 0.84 | 0.76 | 0.80 | 0.79 | 68 |
+
+### `cme`
+| cutoff | gKO prec | gKO rec | gKO F1 | rxn rec | rxn novel |
+|---|---|---|---|---|---|
+| 1e-10 | 0.50 | 0.98 | 0.67 | 1.00 | 541 |
+| 1e-20 | 0.57 | 0.98 | 0.72 | 1.00 | 421 |
+| 1e-30 | 0.61 | 0.97 | 0.75 | 0.97 | 367 |
+| 1e-50 | 0.70 | 0.93 | 0.80 | 0.94 | 307 |
+| 1e-70 | 0.75 | 0.85 | 0.80 | 0.87 | 223 |
+| 1e-100 | 0.80 | 0.70 | 0.75 | 0.71 | 136 |
+
+### `eco`
+| cutoff | gKO prec | gKO rec | gKO F1 | rxn rec | rxn novel |
+|---|---|---|---|---|---|
+| 1e-10 | 0.53 | 0.99 | 0.69 | 0.99 | 382 |
+| 1e-20 | 0.57 | 0.99 | 0.73 | 0.99 | 300 |
+| 1e-30 | 0.60 | 0.98 | 0.75 | 0.99 | 268 |
+| 1e-50 | 0.67 | 0.95 | 0.78 | 0.98 | 198 |
+| 1e-70 | 0.73 | 0.88 | 0.80 | 0.93 | 157 |
+| 1e-100 | 0.82 | 0.74 | 0.77 | 0.80 | 96 |
+
+### `mge`
+| cutoff | gKO prec | gKO rec | gKO F1 | rxn rec | rxn novel |
+|---|---|---|---|---|---|
+| 1e-10 | 0.52 | 0.98 | 0.68 | 0.99 | 75 |
+| 1e-20 | 0.62 | 0.96 | 0.75 | 0.98 | 51 |
+| 1e-30 | 0.65 | 0.95 | 0.77 | 0.98 | 39 |
+| 1e-50 | 0.77 | 0.84 | 0.80 | 0.87 | 29 |
+| 1e-70 | 0.85 | 0.73 | 0.78 | 0.73 | 27 |
+| 1e-100 | 0.87 | 0.50 | 0.64 | 0.47 | 21 |
+
+**Reading:** recall is flat-and-high from 1e-10 to ~1e-30, then falls as the
+cut-off eats into the matched tail — gently for model organisms, sharply for the
+divergent `mge` (rxn recall 0.98 → 0.87 from 1e-30 → 1e-50, → 0.47 at 1e-100).
+The recall lost to a stricter cut-off is *not* noise rejection (noise is at 1e-8);
+it is real annotation. `rxn_novel` shrinks with stricter cut-offs because strong
+un-annotated homologs are also removed.
+
+## 3. Score-ratio sweep (`cutoff = 1e-50`)
+
+| organism | ko ratio | g ratio | gKO prec | gKO rec | gKO F1 |
+|---|---|---|---|---|---|
+| `sce` | 0.0 | 0.50 | 0.61 | 0.96 | 0.74 |
+| `sce` | 0.0 | 0.80 | 0.77 | 0.95 | 0.85 |
+| `sce` | 0.0 | 0.95 | 0.84 | 0.93 | 0.88 |
+| `sce` | 0.3 | 0.80 | 0.78 | 0.95 | 0.86 |
+| `sce` | 0.5 | 0.80 | 0.80 | 0.95 | 0.86 |
+| `cme` | 0.0 | 0.50 | 0.53 | 0.94 | 0.68 |
+| `cme` | 0.0 | 0.80 | 0.69 | 0.93 | 0.79 |
+| `cme` | 0.0 | 0.95 | 0.78 | 0.92 | 0.84 |
+| `cme` | 0.3 | 0.80 | 0.70 | 0.93 | 0.80 |
+| `cme` | 0.5 | 0.80 | 0.70 | 0.93 | 0.80 |
+| `eco` | 0.0 | 0.50 | 0.39 | 0.96 | 0.56 |
+| `eco` | 0.0 | 0.80 | 0.66 | 0.95 | 0.78 |
+| `eco` | 0.0 | 0.95 | 0.76 | 0.94 | 0.84 |
+| `eco` | 0.3 | 0.80 | 0.67 | 0.95 | 0.78 |
+| `eco` | 0.5 | 0.80 | 0.69 | 0.95 | 0.80 |
+| `mge` | 0.0 | 0.50 | 0.62 | 0.85 | 0.72 |
+| `mge` | 0.0 | 0.80 | 0.77 | 0.84 | 0.80 |
+| `mge` | 0.0 | 0.95 | 0.82 | 0.81 | 0.81 |
+| `mge` | 0.3 | 0.80 | 0.77 | 0.84 | 0.80 |
+| `mge` | 0.5 | 0.80 | 0.78 | 0.84 | 0.81 |
+
+**Reading:**
+- **`min_score_ratio_ko` is inert** — across all four organisms, varying it
+  0.0 → 0.3 → 0.5 changes precision/recall by ≤0.02 (mostly 0.00). It is a
+  magic-number knob that does effectively nothing here. (Full 0.0/0.3/0.5 × g-grid
+  in the script output; representative rows shown.)
+- **`min_score_ratio_g` is the real precision lever** — 0.80 → 0.95 lifts
+  precision ~0.07–0.10 for ~0.02 recall loss. 0.50 is clearly too loose.
+
+## 4. Chosen defaults and effect
+
+| parameter | RAVEN / old | new default | rationale |
+|---|---|---|---|
+| `cutoff` | 1e-50 | **1e-30** | recovers the matched tail (esp. divergent genomes); still ~22 orders above the 1e-8 noise floor |
+| `min_score_ratio_g` | 0.8 | **0.9** | the effective precision lever; offsets the looser cut-off |
+| `min_score_ratio_ko` | 0.3 | 0.3 (kept) | empirically inert; retained for RAVEN parity |
+
+Old default `(1e-50, 0.3, 0.8)` vs new default `(1e-30, 0.3, 0.9)`
+(`min_score_ratio_ko` 0.3 ≡ 0.0 here):
+
+| organism | gKO prec | gKO rec | rxn rec | rxn novel |
+|---|---|---|---|---|
+| `sce` | 0.78 → 0.76 | 0.95 → 0.96 | 0.96 → 0.96 | 157 → 137 |
+| `cme` | 0.70 → 0.67 | 0.93 → 0.96 | 0.94 → 0.97 | 307 → 305 |
+| `eco` | 0.67 → 0.67 | 0.95 → 0.97 | 0.98 → 0.99 | 198 → 173 |
+| `mge` | 0.77 → 0.69 | **0.84 → 0.94** | **0.87 → 0.97** | 29 → 35 |
+
+The divergent minimal genome gains ~10 points of recall (the case the sequence
+path exists for); model organisms improve slightly and `eco` emits *fewer*
+unannotated reactions (the tighter gene-ratio prunes spurious multi-KO genes). The
+small precision dip vs annotation is dominated by extra strong homologs, not
+weak-hit noise.
+
+## 5. Whole-model cross-validation (sanity check)
+
+Full reconstruction of *S. cerevisiae* two ways, at the old defaults:
+
+| | annotation path (3b.4) | HMM path (3b.5) |
+|---|---|---|
+| reactions | 1355 | 1461 |
+| metabolites | 1501 | 1567 |
+| genes | 835 | 896 |
+
+Reaction recall 96.3 % (1305/1355 shared, Jaccard 0.86); gene recall 96.6 %
+(807/835 shared, Jaccard 0.87). The annotation path also exercises the new
+`organism_gene_ko.tsv.xz` artefact (K14). `hmmscan` throughput ≈ 0.1 s/query
+against either library on 12 threads (yeast: 6021 queries in 633 s).
diff --git a/docs/maintaining_binaries.md b/docs/maintaining_binaries.md
new file mode 100644
index 0000000..df5b315
--- /dev/null
+++ b/docs/maintaining_binaries.md
@@ -0,0 +1,236 @@
+# Maintaining bundled binaries (BLAST+, DIAMOND, …)
+
+Audience: **raven-python maintainers / the GitHub repo owner.** This explains how
+raven-python ships external command-line tools, how to update their versions, and how
+to build **minimal-footprint** ZIPs to attach to a GitHub release.
+
+> End users never read this. They get a binary automatically via `ensure_binary`,
+> or use their own (system/conda) install. This doc is only for whoever publishes
+> the release assets.
+
+---
+
+## 1. How binary provisioning works
+
+raven-python does **not** vendor binaries in the git repo or on PyPI. Instead:
+
+1. For each tool we publish **version-pinned ZIPs as GitHub release assets**.
+2. A **registry** (`src/raven_python/binaries_registry.json`) maps each *bundle* to its
+   version, the executables it provides, and per-platform `{asset, sha256}`.
+3. At run time `raven_python.binaries.ensure_binary("blastp")` resolves a tool in this
+   order — and only reaches the download as a last resort:
+
+   ```
+   explicit binary= arg  →  env var (RAVEN_PYTHON_BLASTP / RAVEN_PYTHON_DIAMOND / …)
+     →  shutil.which on PATH (system / conda / apt / brew)
+     →  ensure_binary: download the pinned ZIP → verify SHA256 → cache → return path
+     →  actionable error (with conda / manual instructions)
+   ```
+
+So a pre-installed binary always wins; the bundle is the zero-setup fallback.
+Pinning the version makes reconstruction **reproducible**.
+
+A *bundle* can provide several executables from one download (e.g. the `blast`
+bundle provides both `blastp` and `makeblastdb`), so they are fetched once.
+
+---
+
+## 2. What raven-python actually needs — ship only these
+
+Distribute the **minimum** set of executables. Everything else (other suite
+tools, docs, examples, changelogs) must be excluded.
+
+| Bundle | Executables to include | Everything else |
+|---|---|---|
+| `diamond` | `diamond` | — (it is a single static binary) |
+| `blast` | `blastp`, `makeblastdb` | **drop** `blastn`, `tblastn`, `psiblast`, `rpsblast`, `blast_formatter`, `*_vdb`, the `doc/`, `ChangeLog`, `README`, ~30 other tools |
+
+(Confirmed against RAVEN `getBlast`/`getDiamond`: only `makeblastdb`+`blastp`, and
+`diamond` for its `makedb`/`blastp` subcommands, are ever invoked.)
+
+For BLAST+ this is the big win: the full NCBI suite is ~hundreds of MB; two
+binaries (stripped) are a small fraction.
+
+---
+
+## 3. Asset & ZIP conventions
+
+**Asset filename:** `<bundle>-<version>-<os>-<arch>.zip`
+
+- `<os>` ∈ `linux`, `macos`, `windows`
+- `<arch>` ∈ `x86_64`, `arm64`
+- examples: `diamond-2.1.11-linux-x86_64.zip`, `blast-2.16.0-macos-arm64.zip`
+
+**ZIP layout — flat, executables at the root, plus the upstream licence:**
+
+```
+diamond-2.1.11-linux-x86_64.zip
+├── diamond
+└── LICENSE
+
+blast-2.16.0-linux-x86_64.zip
+├── blastp
+├── makeblastdb
+└── LICENSE
+```
+
+No nested `bin/`, no extra files. `ensure_binary` extracts the ZIP into the cache
+and expects the executable at the top level.
+
+---
+
+## 4. Step-by-step: add or update a version
+
+Example: bump DIAMOND to a new version for Linux x86-64. Repeat per `(os, arch)`.
+
+1. **Download the official upstream build** (never rebuild from source unless you
+   must):
+   - DIAMOND → <https://github.com/bbuchfink/diamond/releases>
+     (`diamond-linux64.tar.gz`, `diamond-macos.tar.gz`)
+   - BLAST+ → <https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/> or a
+     pinned version dir (`ncbi-blast-<ver>+-x64-linux.tar.gz`,
+     `-x64-macosx.tar.gz`, `-aarch64-linux.tar.gz`, `-x64-win64.tar.gz`).
+   - Record the upstream URL **and** its published checksum for provenance.
+2. **Extract only the needed executables** (see §2) to a clean staging dir.
+3. **Strip debug symbols** to shrink (skip on Windows / signed macOS builds):
+   ```bash
+   strip diamond           # or: strip blastp makeblastdb
+   ```
+4. **Smoke-test the stripped binaries in a clean shell** (no other tools on PATH):
+   ```bash
+   ./diamond --version
+   ./blastp -version && ./makeblastdb -version
+   ```
+   If they fail for a missing shared library, add that `.so`/`.dylib` to the ZIP
+   (rare — NCBI/DIAMOND release builds are largely self-contained).
+5. **Add the upstream licence file** as `LICENSE` (see §6).
+6. **Zip with max compression, flat layout:**
+   ```bash
+   zip -9 -j diamond-2.1.11-linux-x86_64.zip diamond LICENSE
+   # -j junks paths so entries sit at the ZIP root
+   ```
+7. **Compute the SHA256:**
+   ```bash
+   sha256sum diamond-2.1.11-linux-x86_64.zip   # shasum -a 256 on macOS
+   ```
+8. **Attach the ZIP to a raven-python GitHub release** (a release tagged for the binary
+   set, e.g. `binaries-2024.06`, keeps them independent of code releases).
+9. **Update the registry** `src/raven_python/binaries_registry.json` — bump `version`
+   and set the per-platform `asset` + `sha256`:
+   ```json
+   {
+     "diamond": {
+       "version": "2.1.11",
+       "provides": ["diamond"],
+       "platforms": {
+         "linux-x86_64": {
+           "asset": "diamond-2.1.11-linux-x86_64.zip",
+           "url": "https://github.com/SysBioChalmers/raven-python/releases/download/binaries-2024.06/diamond-2.1.11-linux-x86_64.zip",
+           "sha256": "<sha256>"
+         }
+       }
+     },
+     "blast": {
+       "version": "2.16.0",
+       "provides": ["blastp", "makeblastdb"],
+       "platforms": { "linux-x86_64": { "asset": "...", "url": "...", "sha256": "..." } }
+     }
+   }
+   ```
+10. **Commit the registry change**, run the homology tests, and (if you have the
+    binary) confirm `ensure_binary("diamond", version="2.1.11")` downloads,
+    verifies, and runs.
+
+---
+
+## 5. Keeping the footprint minimal — checklist
+
+- ✅ Only the executables in §2 (for BLAST+, exactly `blastp` + `makeblastdb`).
+- ✅ `strip` the binaries (often halves their size).
+- ✅ `zip -9 -j` (max compression, flat — no `bin/`, no folders).
+- ✅ Exactly one extra file: `LICENSE`.
+- ❌ No docs, examples, `ChangeLog`, `README`, man pages, test data, or sibling tools.
+- ❌ No `.dSYM`/debug bundles; no duplicate static `.a` libraries.
+- ➕ Only add a shared library if step-4 testing proves it is required.
+
+---
+
+## 6. Platform / architecture matrix & licensing
+
+**Coverage = what you build.** Start with `linux-x86_64` (CI default), then add
+`macos-arm64`, `macos-x86_64`, `linux-arm64`, `windows-x86_64` as capacity allows.
+For any `(os, arch)` **not** in the registry, `ensure_binary` raises an actionable
+error pointing to conda (`conda install -c bioconda diamond blast`) or a manual
+install — that is the documented fallback, not a failure to fix urgently.
+
+**Licensing (must comply when redistributing):**
+
+- **BLAST+** — produced by NCBI (US Government); **public domain**, free to
+  redistribute. Include NCBI's `LICENSE` for courtesy/provenance.
+- **DIAMOND** — **GPLv3**. Redistribution is allowed; you **must** include the
+  GPLv3 licence text in the ZIP and keep the binary unmodified (or offer source).
+- **HMMER** (future) — BSD-3-Clause; include its `LICENSE`.
+
+Always ship the upstream licence in the ZIP, and keep a `BINARIES_PROVENANCE.md`
+(or a note in the release body) recording, per asset: upstream URL, upstream
+version, upstream checksum, and the SHA256 you published.
+
+### Native OS support per tool
+
+raven-python invokes each tool through `subprocess.run([resolved_path, …])` — that
+call is itself cross-platform, so the real constraint is whether a given tool has
+a binary that runs natively on each OS. It varies:
+
+| Tool | Linux | macOS (incl. arm64) | Windows (native) |
+|---|---|---|---|
+| BLAST+ (`blastp`, `makeblastdb`) | ✅ | ✅ | ✅ (NCBI ships Windows builds) |
+| DIAMOND | ✅ | ✅ | ⚠️ native build exists but Linux-first |
+| HMMER (`hmmbuild`/`hmmpress`/`hmmsearch`/`hmmscan`) | ✅ | ✅ | ❌ no official native build |
+| MAFFT | ✅ | ✅ | ⚠️ Windows package is a wrapper |
+| CD-HIT | ✅ | ✅ | ❌ no Windows build exists |
+
+Implications:
+
+- **Linux / macOS** — everything works. `conda install -c bioconda hmmer mafft
+  cd-hit blast diamond`, or point the `RAVEN_PYTHON_*` env vars at your installs.
+- **Native Windows** — the homology track (BLAST+/DIAMOND) works, but the **KEGG
+  HMM build (3b.3) and HMM query (3b.5) do not**: HMMER and CD-HIT have no Windows
+  binaries, and bioconda has no Windows packages for any of them. Bundling can't
+  fix this — there is no binary to bundle.
+- **Windows users should run raven-python inside WSL2** (or a Linux container), where
+  every tool is native Linux. raven-python does **not** replicate RAVEN's
+  `getWSLpath`/`wsl …` path translation: it calls the resolved binary directly, so
+  mixing native-Windows Python with WSL binaries is unsupported — keep the whole
+  stack inside WSL2.
+- The common end-user paths — homology reconstruction and the KEGG *species* model
+  (3b.4) — need no HMMER/MAFFT/CD-HIT, so they are fully cross-platform.
+
+---
+
+## 7. Emitting the registry entry
+
+After building the per-platform ZIPs (named `<bundle>-<version>-<os>-<arch>.zip`)
+and uploading them to the release, generate the `_REGISTRY` entry — checksums and
+URLs — with [`scripts/make_registry_snippet.py`](../scripts/README.md):
+
+```bash
+python scripts/make_registry_snippet.py binary --bundle blast --version 2.16.0 \
+    --provides blastp makeblastdb --dir zips \
+    --base-url https://github.com/ORG/raven-python/releases/download/blast-2.16.0
+```
+
+It prints the ready-to-paste `_REGISTRY["blast"]` block; its SHA256 helper is the
+same one `ensure_binary` verifies with, so the checksums always match. (Producing
+the minimal ZIPs themselves — download upstream, `strip`, `zip -9 -j`, add
+`LICENSE` per §3–§6 — is still a manual/per-tool step.)
+
+---
+
+## 8. Adding a new tool later (e.g. HMMER for KEGG reconstruction)
+
+1. Decide the **minimal executable set** (e.g. HMMER → `hmmsearch`, `hmmscan`,
+   maybe `hmmbuild`/`hmmpress`).
+2. Add a bundle entry to the registry with `provides` listing those executables.
+3. Build/attach ZIPs per §3–§4; include the tool's licence (§6).
+4. The wrappers call `ensure_binary("hmmsearch", …)` with the same resolution
+   order — no new provisioning code needed.
diff --git a/docs/maintaining_kegg_data.md b/docs/maintaining_kegg_data.md
new file mode 100644
index 0000000..f53d0da
--- /dev/null
+++ b/docs/maintaining_kegg_data.md
@@ -0,0 +1,157 @@
+# Maintaining the KEGG data artefacts
+
+This guide is for the **package maintainer** who rebuilds raven-python's KEGG
+artefacts once per KEGG release. End users never do this — they download the
+published, version-pinned artefacts. The build has two implemented steps so far:
+**3b.1 download** (`reconstruction/kegg/download.py`) and **3b.2 parse**
+(`reconstruction/kegg/parse.py`); see PLAN.md §2.3b for the full pipeline.
+
+## Prerequisites
+
+### A paid KEGG FTP subscription
+The bulk KEGG dump is licensed. You need an active subscription to
+`ftp.kegg.net`, which gives you a **username and password**.
+
+### Credentials in `~/.netrc`
+The download reads your KEGG username and password from a `~/.netrc` file — it
+never takes them on the command line, so they stay out of your shell history and
+out of `ps` output. Create the file (readable only by you) and add a `machine`
+line for the KEGG host:
+
+```bash
+touch ~/.netrc && chmod 600 ~/.netrc
+```
+
+Then add this single line to `~/.netrc`, substituting your subscription
+credentials:
+
+```
+machine ftp.kegg.net login YOUR_KEGG_USER password YOUR_KEGG_PASSWORD
+```
+
+Notes:
+- The host **must be `ftp.kegg.net`** — that is the machine name the downloader
+  looks up. A `machine` line for any other host is ignored.
+- The file **must be mode `600`** (owner read/write only). Python's `netrc`
+  parser refuses a `.netrc` that other users can read.
+- `~/.netrc` is the same convention `curl`, `wget` and `git` use, so if you
+  already have one, just add the `ftp.kegg.net` line to it.
+
+If you keep secrets somewhere other than `$HOME`, point the downloader at a
+different file with `netrc_path=...` (see below); the format is identical.
+
+## Step 3b.1 — download and arrange the dump
+
+With `~/.netrc` in place, no credentials need to be passed in code:
+
+```python
+from raven_python.reconstruction.kegg import download_kegg_dump
+
+# Reads ~/.netrc, fetches the KEGG archives, extracts and arranges them.
+download_kegg_dump("keggdb")
+```
+
+This fetches the reaction / compound / glycan / ko archives, the eukaryote and
+prokaryote proteomes, and the taxonomy file; extracts them; and arranges the
+flat layout the parser expects (`reaction`, `reaction.lst`,
+`reaction_mapformula.lst`, `compound` = compound + glycan, `compound.inchi`,
+`ko`, `genes.pep` = both proteomes, `taxonomy`).
+
+Credential alternatives:
+
+```python
+# A .netrc in a non-default location:
+download_kegg_dump("keggdb", netrc_path="/run/secrets/kegg_netrc")
+
+# Pass credentials explicitly (only when they come from a secret manager at
+# runtime — never hardcode literals in committed code):
+download_kegg_dump("keggdb", auth=("YOUR_KEGG_USER", "YOUR_KEGG_PASSWORD"))
+```
+
+Already-downloaded files are skipped; pass `force=True` to re-fetch (for a new
+KEGG release).
+
+## Step 3b.2 — parse into the published artefacts
+
+```python
+from raven_python.reconstruction.kegg import parse_kegg_dump
+
+parse_kegg_dump("keggdb", "artefacts")
+```
+
+This writes the gene-free reference model (`reference_model.yml.gz`, gzipped
+RAVEN/cobra YAML) and the relational tables as gzipped TSV. See
+[kegg_data_format.md](kegg_data_format.md) for what those tables contain and the
+format rationale.
+
+## Step 3b.3 — build the HMM libraries
+
+Build the per-domain profile-HMM libraries that the de-novo query path (3b.5)
+searches. This needs **HMMER** (`hmmbuild`, `hmmpress`), **MAFFT**, and
+**CD-HIT** on `PATH` (or set `RAVEN_PYTHON_HMMBUILD` / `RAVEN_PYTHON_MAFFT` /
+`RAVEN_PYTHON_CDHIT`, etc.); install e.g. `conda install -c bioconda hmmer mafft cd-hit`.
+
+> **OS note:** these three tools run on Linux and macOS but **not native
+> Windows** — on Windows, run this step inside WSL2. See the native-OS-support
+> matrix in [maintaining_binaries.md](maintaining_binaries.md#native-os-support-per-tool).
+
+```python
+from raven_python.reconstruction.kegg import build_hmm_library, read_kegg_table
+
+organism_gene_ko = read_kegg_table("artefacts/organism_gene_ko.tsv.xz")
+for domain in ("prokaryotes", "eukaryotes"):
+    build_hmm_library(
+        organism_gene_ko,
+        "keggdb/genes.pep",      # proteomes from 3b.1
+        "keggdb/taxonomy",       # domain split, from 3b.1
+        f"hmms/{domain}",
+        domain=domain,
+    )
+```
+
+For each KO in the domain it gathers the member sequences, dereplicates with
+CD-HIT (~90 % identity), aligns with MAFFT, trains a profile with `hmmbuild`, and
+finally concatenates and `hmmpress`-es them into a single `library.hmm` for fast
+`hmmscan` querying. This is the slowest step (hours, once per KEGG release); it
+skips KOs whose `.hmm` already exists, so it is resumable. The resulting
+libraries are published as version-pinned artefacts alongside the reference model
+and tables.
+
+## Building and publishing in one go
+
+[`scripts/build_kegg_artefacts.py`](../scripts/README.md) runs 3b.2 (+ 3b.3 with
+`--hmms`) and lays the output out as publishable assets (`<domain>.hmm` named for
+`ensure_kegg_hmm_library`):
+
+```bash
+python scripts/build_kegg_artefacts.py --keggdb keggdb --out artefacts --hmms --threads 8
+```
+
+Upload the contents of `artefacts/` to a release, then emit the registry entry for
+`raven_python.data._DATA_REGISTRY` with [`scripts/make_registry_snippet.py`](../scripts/README.md):
+
+```bash
+python scripts/make_registry_snippet.py data --dataset kegg --version kegg116 \
+    --dir artefacts --base-url https://github.com/ORG/raven-python/releases/download/kegg-data-kegg116
+```
+
+Paste the printed block into `_DATA_REGISTRY`; from then on `ensure_data` fetches
+and verifies the artefacts for end users automatically.
+
+## End-user paths (3b.4 / 3b.5)
+
+End users do **not** run the steps above — the published artefacts are fetched and
+cached automatically by `ensure_data` (`raven_python.data`) under
+`~/.cache/raven-python/data/kegg-<version>/` on first use, so the entry points below
+can be called with no local paths at all (pass an explicit `artefact_dir=`/
+`library=` to use your own build instead). Two runtime entry points build a draft
+model from the artefacts:
+
+- **3b.4 — species in KEGG** (`get_kegg_model_for_organism_from_artefacts`): no
+  binaries needed; uses the organism's KEGG gene↔KO annotations. Fully
+  cross-platform. `organism_id="prokaryotes"`/`"eukaryotes"` builds a whole-domain
+  model (pass `taxonomy=`).
+- **3b.5 — organism not in KEGG** (`get_kegg_model_from_sequences`): `hmmscan`-es a
+  proteome FASTA against the pressed `library.hmm`, so it needs **HMMER**
+  (`hmmscan`) — Linux/macOS or WSL2 (see the OS matrix). Tune assignment with
+  `cutoff`, `min_score_ratio_ko`, `min_score_ratio_g`.
diff --git a/docs/yeast_localization_benchmark.md b/docs/yeast_localization_benchmark.md
new file mode 100644
index 0000000..de75566
--- /dev/null
+++ b/docs/yeast_localization_benchmark.md
@@ -0,0 +1,148 @@
+# yeast-GEM localisation benchmark
+
+Real-data validation of [`localization.predict_localization`](../src/raven_python/localization/predict.py)
+on the curated yeast-GEM. The benchmark is end-to-end — model, scoring, MILP — and
+sweeps predictor noise so the failure modes are visible, not just the headline accuracy.
+
+* Driver: [`scripts/benchmark_localization_yeast.py`](../scripts/benchmark_localization_yeast.py)
+* Yeast-GEM source: `pcSecYeastSpecies/Model/yeastGEM.xml` (3991 reactions, 1147 genes,
+  14 compartments).
+* Run command:
+  ```bash
+  python scripts/benchmark_localization_yeast.py \
+      --yeast-gem ~/github/pcSecYeastSpecies/Model/yeastGEM.xml \
+      --noise 0,0.1,0.25,0.5 \
+      --max-reactions 300 \
+      --transport-cost 0.05 \
+      --time-limit 300 \
+      --doc docs/yeast_localization_benchmark.md
+  ```
+
+## Setup
+
+1. **Truth set**: every yeast-GEM reaction that (a) has a GPR, (b) is non-boundary,
+   and (c) has all metabolites in the same compartment. 2 155 reactions qualify;
+   stratified subsampling to 298 keeps the per-compartment distribution. The 14
+   compartments collapse to 12 placement targets in the truth set (extracellular and
+   the lipid particle / vacuolar membrane variants stay distinct).
+2. **Flattening**: the model is squashed into one compartment with
+   [`manipulation.merge_compartments`](../src/raven_python/manipulation/compartments.py)
+   so the predictor cannot lean on metabolite-topology evidence. Without this step
+   every GPR'd reaction's "predicted" compartment is just its current one — vacuous.
+3. **Reference scores**: each gene gets `1.0` in every compartment that hosts one of
+   its reactions in the original (multi-compartment) model. This is the
+   *perfect-predictor* upper bound; real WoLF PSORT / DeepLoc output will be noisier.
+4. **Noise injection**: at noise level `p` each gene independently has probability
+   `p` of having a confidently *wrong* compartment grafted in as the new top score
+   (the true compartment is demoted to half its score). This simulates a predictor
+   that's right `1-p` of the time and confidently wrong otherwise — a more
+   pessimistic stand-in than uniform Gaussian jitter.
+5. **MILP**: `transport_cost=0.05`, `multi_compartment_penalty=0.5`, `mip_gap=0.01`,
+   `time_limit=300s`, Gurobi. The MILP has 7 982 binaries, 2 691 rows, 29 842
+   nonzeros at this scale — solves in 30–50 s.
+
+## Accuracy vs. predictor noise
+
+Accuracy = fraction of relocated reactions that the MILP places back in the truth
+compartment.
+
+| noise | seconds | n_total | n_correct | n_unplaced | accuracy |
+|------:|--------:|--------:|----------:|-----------:|---------:|
+| 0.00  | 46      | 298     | 213       | 0          | 0.715    |
+| 0.10  | 34      | 298     | 199       | 0          | 0.668    |
+| 0.25  | 41      | 298     | 175       | 0          | 0.587    |
+| 0.50  | 31      | 298     | 115       | 0          | 0.386    |
+
+Monotone degradation, no MILP infeasibilities at any noise level. At 10 % confident
+mis-scoring the accuracy drops only ~4.7 pp — the algorithm largely shrugs off small
+predictor noise because each compartment's evidence is the sum of all its genes'
+scores, so a few wrong genes get out-voted by their neighbours. At 50 % the algorithm
+is still better than the 1/12 = 8.3 % uniform baseline, but the loss is steep.
+
+## Confusion matrix at noise=0.00
+
+Rows = curated (true) compartment; columns = predicted. The `c` column dominates
+because cytosolic genes are also active in many other compartments (so an mm-only
+reaction shares its genes with cytosolic reactions, and the algorithm picks `c`).
+
+| true \ pred | c   | ce | er | erm | g | gm | lp | m  | mm | p  | v | vm |
+|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
+| **c**   | 91 | 0  | 0  | 0   | 0 | 0  | 0  | 0  | 0  | 0  | 0 | 0  |
+| **ce**  | 4  | 11 | 0  | 0   | 0 | 0  | 0  | 0  | 0  | 0  | 0 | 0  |
+| **e**   | 1  | 0  | 0  | 0   | 0 | 0  | 0  | 0  | 0  | 0  | 0 | 0  |
+| **er**  | 6  | 0  | 7  | 0   | 0 | 0  | 0  | 0  | 0  | 0  | 0 | 0  |
+| **erm** | 18 | 0  | 0  | 30  | 0 | 0  | 0  | 0  | 0  | 0  | 0 | 0  |
+| **g**   | 0  | 0  | 0  | 0   | 2 | 0  | 0  | 0  | 0  | 0  | 0 | 0  |
+| **gm**  | 4  | 0  | 0  | 0   | 0 | 3  | 0  | 0  | 0  | 0  | 0 | 0  |
+| **lp**  | 0  | 0  | 0  | 0   | 0 | 0  | 21 | 0  | 0  | 0  | 0 | 0  |
+| **m**   | 8  | 0  | 0  | 0   | 0 | 0  | 0  | 20 | 0  | 0  | 0 | 0  |
+| **mm**  | 38 | 0  | 0  | 0   | 0 | 0  | 0  | 0  | 3  | 0  | 0 | 0  |
+| **n**   | 3  | 0  | 0  | 0   | 0 | 3  | 0  | 0  | 0  | 0  | 0 | 0  |
+| **p**   | 0  | 0  | 0  | 0   | 0 | 0  | 0  | 0  | 0  | 16 | 0 | 0  |
+| **v**   | 0  | 0  | 0  | 0   | 0 | 0  | 0  | 0  | 0  | 0  | 1 | 0  |
+| **vm**  | 0  | 0  | 0  | 0   | 0 | 0  | 0  | 0  | 0  | 0  | 0 | 8  |
+
+## Per-compartment accuracy at noise=0.00
+
+| compartment | n  | n_correct | accuracy |
+|---|---:|---:|---:|
+| c   | 91 | 91 | 1.000 |
+| ce  | 15 | 11 | 0.733 |
+| e   | 1  | 0  | 0.000 |
+| er  | 13 | 7  | 0.538 |
+| erm | 48 | 30 | 0.625 |
+| g   | 2  | 2  | 1.000 |
+| gm  | 7  | 3  | 0.429 |
+| lp  | 21 | 21 | 1.000 |
+| m   | 28 | 20 | 0.714 |
+| mm  | 41 | 3  | 0.073 |
+| n   | 6  | 0  | 0.000 |
+| p   | 16 | 16 | 1.000 |
+| v   | 1  | 1  | 1.000 |
+| vm  | 8  | 8  | 1.000 |
+
+## What the failures mean
+
+* **Compartments with self-contained gene sets are perfect.** `c`, `g`, `lp`, `p`,
+  `v`, `vm` reach 100 %: their gene sets are largely disjoint from `c`'s, so the
+  per-compartment evidence sum picks them cleanly.
+* **Inner-membrane vs. matrix collapses to cytosol.** The mitochondrial inner
+  membrane (`mm`, 41 reactions, 7.3 % correct) and nucleus (`n`, 6 reactions, 0 %
+  correct) lose to `c` because their genes are *also* annotated to cytosolic
+  reactions. The algorithm sees gene `X` with score `1.0` in both `c` and `mm`,
+  and `c` wins on the larger pool of co-localised reactions. This is faithful to
+  the predictor evidence — a real WoLF PSORT / DeepLoc score table that distinguishes
+  inner-membrane from matrix would do better here, but the
+  derive-scores-from-the-model harness can't see that distinction.
+* **Membrane / non-membrane pairs split correctly.** `erm` vs `er`, `gm` vs `g` —
+  the algorithm prefers the membrane sub-compartment when its genes are more
+  membrane-typical, which the score harness reproduces. 60–75 % is honest signal.
+* **No MILP infeasibilities.** Even at 50 % confident mis-scoring every reaction
+  gets placed (the unplaced column stays 0).
+
+## Calibration insight: `transport_cost` matters
+
+The first smoke run used `transport_cost=0.5` (the default) and dumped almost every
+reaction into `c`. With ~5 metabolites per reaction the per-reaction transport bill
+overwhelmed the unit-scale gene reward, so the optimiser's best move was always
+"keep it in the default compartment, pay no transports." Dropping to
+`transport_cost=0.05` restored the per-compartment signal. For a real predictor with
+typical score magnitudes ≪ 1, the user should expect to dial `transport_cost` *down*
+into the same per-metabolite-per-compartment range as the typical gene-score-delta —
+the doc-string default of 0.5 is sized for clean integer-style scores, not
+soft-probability output.
+
+## Reproducing
+
+Make the smoke fast (subsampled, small noise grid):
+```bash
+python scripts/benchmark_localization_yeast.py \
+    --noise 0,0.25 --max-reactions 100 --time-limit 60
+```
+
+Plug in a real predictor (CSV of `gene_id` + one column per compartment):
+```bash
+python scripts/benchmark_localization_yeast.py \
+    --scores-csv my_deeploc_yeast.csv \
+    --noise 0 --max-reactions 300
+```
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..faeeb1f
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,84 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "raven-python"
+version = "0.0.1"
+description = "Reconstruction, Analysis and Visualization of Metabolic Networks in Python, a port of the RAVEN Toolbox built on cobrapy"
+readme = "README.md"
+requires-python = ">=3.11"
+license = { text = "GPL-3.0-or-later" }
+authors = [
+    { name = "Eduard Kerkhoven", email = "eduardk@chalmers.se" },
+]
+keywords = [
+    "genome-scale-model",
+    "metabolic-model",
+    "reconstruction",
+    "raven",
+    "cobra",
+    "systems-biology",
+    "constraint-based-modeling",
+    "kegg",
+    "metacyc",
+    "tinit",
+]
+classifiers = [
+    "Development Status :: 2 - Pre-Alpha",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+]
+dependencies = [
+    "cobra>=0.29",
+    "numpy>=1.21",
+    "pandas>=2",
+    "scipy>=1.10",
+    "ruamel.yaml>=0.17",
+    "requests>=2.28",
+    "tqdm>=4.65",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7",
+    "pytest-cov",
+    "ruff>=0.4",
+]
+excel = [
+    "openpyxl>=3.1",
+]
+plotting = [
+    "matplotlib>=3.5",
+]
+
+[project.urls]
+Homepage = "https://github.com/SysBioChalmers/raven-python"
+Source = "https://github.com/SysBioChalmers/raven-python"
+Issues = "https://github.com/SysBioChalmers/raven-python/issues"
+"RAVEN MATLAB" = "https://github.com/SysBioChalmers/RAVEN"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/raven_python"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "I", "UP", "B"]
+ignore = [
+    "E501",  # line length handled by the formatter
+]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["E402"]
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 0000000..67a3b36
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,35 @@
+# Maintainer scripts
+
+Release-time tooling. Not part of the installed package — run them from a checkout
+with raven-python installed (`pip install -e .`). End users never need these.
+
+## `build_kegg_artefacts.py`
+
+Build the publishable KEGG artefact set from an arranged KEGG dump (see
+`download_kegg_dump`): the gzipped-YAML reference model, the gzipped-TSV tables,
+and (with `--hmms`) the per-domain pressed HMM libraries. Output is laid out ready
+to upload as release assets. See [docs/maintaining_kegg_data.md](../docs/maintaining_kegg_data.md).
+
+```bash
+python scripts/build_kegg_artefacts.py --keggdb keggdb --out artefacts          # tables + model
+python scripts/build_kegg_artefacts.py --keggdb keggdb --out artefacts --hmms --threads 8
+```
+
+## `make_registry_snippet.py`
+
+After uploading the files to a release, compute their SHA256 and print the entry
+to merge into the runtime registry — `raven_python.data._DATA_REGISTRY` (data) or
+`raven_python.binaries._REGISTRY` (binary ZIP bundles). The checksum helper is shared
+with the resolvers, so published checksums always match what `ensure_data` /
+`ensure_binary` verify.
+
+```bash
+# Data artefacts:
+python scripts/make_registry_snippet.py data --dataset kegg --version kegg116 \
+    --dir artefacts --base-url https://github.com/ORG/raven-python/releases/download/kegg-data-kegg116
+
+# Binary bundle (ZIPs named <bundle>-<version>-<os>-<arch>.zip):
+python scripts/make_registry_snippet.py binary --bundle blast --version 2.16.0 \
+    --provides blastp makeblastdb --dir zips \
+    --base-url https://github.com/ORG/raven-python/releases/download/blast-2.16.0
+```
diff --git a/scripts/analyze_hmm_cutoffs.py b/scripts/analyze_hmm_cutoffs.py
new file mode 100644
index 0000000..654fc02
--- /dev/null
+++ b/scripts/analyze_hmm_cutoffs.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""Cut-off sensitivity for the KEGG HMM query path (step 3b.5).
+
+Cross-validates ``assign_kos`` against an organism's *real* KEGG gene→KO
+annotation (from the ``organism_gene_ko`` table) and sweeps the E-value cut-off
+and the two score-ratio filters. Produces the tables in
+``docs/kegg_hmm_cutoff_calibration.md``.
+
+Usage
+-----
+    python scripts/analyze_hmm_cutoffs.py \
+        --artefacts ~/keggdb_artefacts \
+        --proteome /path/to/org.pep \
+        --org sce --library ~/keggdb_artefacts/eukaryotes.hmm
+
+``--proteome`` is the organism's protein FASTA (headers ``>org:gene ...``, e.g.
+extracted from KEGG ``genes.pep``). ``--tblout`` may be given instead of
+``--library`` to reuse a cached ``hmmscan --tblout`` file. Requires ``hmmscan``
+on PATH or via ``RAVEN_PYTHON_HMMER`` when ``--library`` is used.
+
+Caveat: organisms present in the library's training set give an upper bound on
+recall; the comparison is relative (see the doc).
+"""
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from raven_python.reconstruction.kegg.parse import read_kegg_table
+from raven_python.reconstruction.kegg.query import (
+    assign_kos,
+    parse_hmmscan_tblout,
+    run_hmmscan,
+)
+
+CUTOFFS = (1e-10, 1e-20, 1e-30, 1e-50, 1e-70, 1e-100)
+KO_RATIOS = (0.0, 0.3, 0.5)
+G_RATIOS = (0.5, 0.8, 0.95)
+
+
+def load_ko2rxn(artefacts: Path) -> dict[str, set[str]]:
+    tbl = read_kegg_table(artefacts / "ko_reaction.tsv.gz")
+    ko2rxn: dict[str, set[str]] = {}
+    for ko, rxn in zip(tbl["ko"], tbl["reaction"], strict=True):
+        ko2rxn.setdefault(ko, set()).add(rxn)
+    return ko2rxn
+
+
+def ground_truth(artefacts: Path, org: str, ko2rxn) -> tuple[set, set]:
+    ogk = read_kegg_table(artefacts / "organism_gene_ko.tsv.xz")
+    rows = ogk[ogk["organism"].str.lower() == org]
+    pairs = set(zip(rows["gene"], rows["ko"], strict=True))
+    rxns = {r for _, ko in pairs for r in ko2rxn.get(ko, ())}
+    return pairs, rxns
+
+
+def predicted_pairs(hits: pd.DataFrame, **kw) -> set:
+    out = set()
+    for ko, genes in assign_kos(hits, **kw).items():
+        for g in genes:
+            out.add((g.split(":", 1)[1] if ":" in g else g, ko))
+    return out
+
+
+def prf(pred: set, truth: set) -> tuple[float, float, float]:
+    tp = len(pred & truth)
+    rec = tp / len(truth) if truth else 0.0
+    prec = tp / len(pred) if pred else 0.0
+    f1 = 2 * prec * rec / (prec + rec) if prec + rec else 0.0
+    return prec, rec, f1
+
+
+def main(argv=None) -> None:
+    ap = argparse.ArgumentParser(description=__doc__)
+    ap.add_argument("--artefacts", type=Path, required=True)
+    ap.add_argument("--org", required=True, help="KEGG organism code, e.g. sce")
+    ap.add_argument("--proteome", type=Path, help="protein FASTA (headers >org:gene)")
+    ap.add_argument("--library", type=Path, help="pressed HMM library for hmmscan")
+    ap.add_argument("--tblout", type=Path, help="cached hmmscan --tblout (skip hmmscan)")
+    ap.add_argument("--threads", type=int, default=4)
+    args = ap.parse_args(argv)
+
+    if args.tblout:
+        text = args.tblout.read_text()
+    elif args.library and args.proteome:
+        text = run_hmmscan(args.proteome, args.library, threads=args.threads)
+    else:
+        ap.error("give --tblout, or --library and --proteome")
+
+    org = args.org.lower()
+    hits = parse_hmmscan_tblout(text)
+    hits = hits[hits["gene"].str.startswith(f"{org}:")].reset_index(drop=True)
+    ko2rxn = load_ko2rxn(args.artefacts)
+    gt_pairs, gt_rxns = ground_truth(args.artefacts, org, ko2rxn)
+
+    print(f"\n{'='*70}\n{org}: {hits['gene'].nunique()} query genes with hits, "
+          f"{len(gt_pairs)} true gene->KO pairs, {len(gt_rxns)} true reactions\n{'='*70}")
+
+    best: dict[tuple, float] = {}
+    for ko, gene, e in zip(hits["ko"], hits["gene"], hits["evalue"], strict=True):
+        key = (gene.split(":", 1)[1], ko)
+        if key not in best or e < best[key]:
+            best[key] = e
+    matched = np.array([e for k, e in best.items() if k in gt_pairs])
+    novel = np.array([e for k, e in best.items() if k not in gt_pairs])
+
+    def logq(arr, q):
+        if not len(arr):
+            return float("nan")
+        v = np.quantile(arr, q)
+        return np.log10(v) if v > 0 else -300.0
+
+    print("\nlog10(E-value) percentiles  [matched=in annotation, novel=not]:")
+    print(f"  {'group':<8}{'n':>7}{'p50':>8}{'p90':>8}{'p95':>8}{'p99':>8}")
+    for name, arr in (("matched", matched), ("novel", novel)):
+        print(f"  {name:<8}{len(arr):>7}{logq(arr,.5):>8.0f}{logq(arr,.9):>8.0f}"
+              f"{logq(arr,.95):>8.0f}{logq(arr,.99):>8.0f}")
+
+    print("\ncutoff sweep (min_score_ratio_ko=0.3, min_score_ratio_g=0.8):")
+    print(f"  {'cutoff':>8}{'gKO_prec':>9}{'gKO_rec':>8}{'gKO_F1':>8}{'rxn_rec':>9}{'rxn_novel':>10}")
+    for cutoff in CUTOFFS:
+        pred = predicted_pairs(hits, cutoff=cutoff)
+        prec, rec, f1 = prf(pred, gt_pairs)
+        pred_rxns = {r for _, ko in pred for r in ko2rxn.get(ko, ())}
+        rrec = len(pred_rxns & gt_rxns) / len(gt_rxns) if gt_rxns else 0.0
+        print(f"  {cutoff:>8.0e}{prec:>9.2f}{rec:>8.2f}{f1:>8.2f}{rrec:>9.2f}"
+              f"{len(pred_rxns - gt_rxns):>10}")
+
+    print("\nratio sweep (cutoff=1e-50):")
+    print(f"  {'ko_ratio':>9}{'g_ratio':>8}{'gKO_prec':>9}{'gKO_rec':>8}{'gKO_F1':>8}")
+    for rko in KO_RATIOS:
+        for rg in G_RATIOS:
+            pred = predicted_pairs(hits, cutoff=1e-50,
+                                   min_score_ratio_ko=rko, min_score_ratio_g=rg)
+            prec, rec, f1 = prf(pred, gt_pairs)
+            print(f"  {rko:>9.1f}{rg:>8.2f}{prec:>9.2f}{rec:>8.2f}{f1:>8.2f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/analyze_init_params.py b/scripts/analyze_init_params.py
new file mode 100644
index 0000000..1059006
--- /dev/null
+++ b/scripts/analyze_init_params.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python3
+"""Parameter calibration for (f)tINIT — intrinsic speed/quality sweeps (Phase 4d.7).
+
+Genome-scale benchmark that sweeps the MILP/conditioning parameters of raven_python's
+:func:`raven_python.init.run_ftinit`, :func:`raven_python.init.ftinit`, :func:`run_init`, and
+:func:`prep_init_model` and records, for each value, the *intrinsic* trade-off: wall-clock
+solve time, the MILP objective, and how far the result drifts from the tightest-setting
+("reference") run — both in objective (relative gap) and in the **kept-reaction set**
+(Jaccard). No external (RAVEN) reference is used: the question answered here is "what is
+the loosest / cheapest setting that still reproduces the tight-setting solution?".
+
+Why reaction-set drift matters: a MIP gap g only guarantees the *objective* is within g of
+optimal; the *which-reactions* answer can jump between alternate optima well before the
+objective moves. For a model-extraction tool the reaction set is the product, so we track
+its stability explicitly.
+
+Sweeps (select with ``--sweeps``; each is resumable — results are pickled per config and a
+re-run skips finished ones):
+
+* ``ftinit_milp``  — single staged-MILP step (step 0 of series '1+1') on the merged model.
+                     Cheap (~30-200 s each); the core sweep for ``mip_gap``/``big_m``/``force_on``.
+* ``prep_scale``   — rescaleModelForINIT on/off and its ``max_stoich_diff``, fed into the
+                     same step-0 MILP. Shows why scaling is needed for a fixed big-M.
+* ``tinit``        — full ``get_init_model`` (classic INIT). Sweeps ``mip_gap``/``eps``/
+                     ``prod_weight``/``big_m``. Expensive — uses a tight ``time_limit``.
+* ``ftinit_full``  — the whole ``ftinit`` pipeline (both steps + gap-fill). Sweeps
+                     ``mip_gap``/``big_m``. Expensive (~200 s+/config).
+
+Usage
+-----
+    python scripts/analyze_init_params.py \
+        --work ~/hgem_compare --cell HCT116 --sweeps ftinit_milp,prep_scale
+
+``--work`` holds ``raven_refModel.xml`` and the Human-GEM-derived spont/custom inputs
+(see the Human-GEM validation run). Requires a MILP solver (Gurobi/HiGHS) on the cobra
+config. Produces a results pickle and prints a table per sweep; feed the tables into
+docs/init_param_calibration.md.
+"""
+from __future__ import annotations
+
+import argparse
+import pickle
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import cobra
+
+from raven_python.init import (
+    ftinit,
+    gene_scores_from_expression,
+    get_init_model,
+    prep_init_model,
+    score_reactions_from_genes,
+)
+from raven_python.init.ftinit import run_ftinit
+from raven_python.init.merge import group_rxn_scores
+from raven_python.init.prep import rescale_for_init
+from raven_python.init.steps import get_init_steps
+
+# Sweep grids (first value of each tolerance sweep is the tight "reference").
+MIP_GAPS = (0.0002, 0.001, 0.003, 0.01, 0.03, 0.1)
+BIG_MS = (100.0, 50.0, 25.0, 250.0, 1000.0)
+FORCE_ONS = (0.1, 0.02, 0.05, 0.2, 0.5)
+MAX_STOICH = (25.0, 10.0, 50.0, 100.0)
+EPS_VALS = (1.0, 0.1, 0.5, 2.0)
+PROD_WEIGHTS = (0.5, 0.0, 0.25, 1.0)
+
+# "Recommended = cheapest config within these of the reference" thresholds.
+TOL_OBJ = 0.005   # relative objective gap
+TOL_JAC = 0.99    # kept-reaction-set Jaccard
+
+
+@dataclass
+class Result:
+    """One config's outcome (reaction set stored sorted for pickling/Jaccard)."""
+
+    label: str
+    seconds: float
+    status: str
+    objective: float
+    n_kept: int
+    reactions: list[str] = field(default_factory=list)
+    rel_obj_gap: float | None = None  # vs the sweep reference
+    jaccard: float | None = None      # vs the sweep reference
+
+
+def _jaccard(a: set[str], b: set[str]) -> float:
+    return len(a & b) / len(a | b) if (a or b) else 1.0
+
+
+def _load_inputs(work: Path, human_gem: Path, cell: str):
+    ref = cobra.io.read_sbml_model(str(work / "raven_refModel.xml"))
+    ref.solver = cobra.Configuration().solver
+    spont = []
+    with open(human_gem / "model" / "reactions.tsv") as f:
+        hdr = f.readline().rstrip("\n").split("\t")
+        ci = hdr.index("spontaneous")
+        for line in f:
+            p = line.rstrip("\n").split("\t")
+            if p[ci] == "1":
+                spont.append(p[0])
+    protein = [f"MAR0{n}" for n in (5155, 5156, 5161, 5167, 5168, 5169, 5170, 5171, 5172,
+               5174, 5260, 5262, 5264, 5266, 5267, 5268, 5269, 5270, 5271, 5273, 5275, 5277,
+               5279, 5281, 5283, 5291)] + ["MAR09817", "MAR09818"]
+    pool = ["MAR00011", "MAR00012", "MAR00477", "MAR05233", "MAR05234", "MAR05238",
+            "MAR05239", "MAR05243", "MAR05244", "MAR05247", "MAR09022", "MAR00015",
+            "MAR00016", "MAR00017", "MAR10033", "MAR10035", "MAR10036", "MAR10037",
+            "MAR10038", "MAR10062", "MAR10063", "MAR10064", "MAR10065", "MAR13082"]
+    custom = sorted(set(protein) | set(pool))
+    expr: dict[str, float] = {}
+    with open(human_gem / "data" / "datasets" / "Hart2015_RNAseq.txt") as f:
+        h = f.readline().rstrip("\n").split("\t")
+        c = h.index(cell)
+        for line in f:
+            p = line.rstrip("\n").split("\t")
+            expr[p[0]] = float(p[c])
+    gene_scores = gene_scores_from_expression(expr, 1.0)
+    rxn_scores = score_reactions_from_genes(ref, gene_scores)
+    return ref, spont, custom, gene_scores, rxn_scores
+
+
+def _step0(prep, rxn_scores):
+    """The scores/flags for step 0 of series '1+1' (the cheap single-MILP probe)."""
+    step = get_init_steps("1+1")[0]
+    to_zero = prep.masks.ignored(step.ignore_mask)
+    scores = group_rxn_scores(prep.min_model, rxn_scores, prep.orig_rxn_ids,
+                              prep.group_ids, to_zero)
+    return step, scores
+
+
+def _run_step0(min_model, scores, prep, step, **kw) -> Result:
+    t = time.time()
+    res = run_ftinit(min_model, scores, essential_rxns=set(prep.essential_rxns),
+                     allow_excretion=step.allow_met_secr, rem_pos_rev=step.pos_rev_off,
+                     ignore_mets=step.mets_to_ignore, **kw)
+    return Result(label="", seconds=time.time() - t, status="ok",
+                  objective=res.objective, n_kept=len(res.on_reactions),
+                  reactions=sorted(res.on_reactions))
+
+
+def _finalize(results: list[Result]) -> None:
+    """Fill rel_obj_gap / jaccard against the first result (the reference)."""
+    ref = results[0]
+    ref_set = set(ref.reactions)
+    for r in results:
+        r.rel_obj_gap = (ref.objective - r.objective) / abs(ref.objective) if ref.objective else 0.0
+        r.jaccard = _jaccard(set(r.reactions), ref_set)
+
+
+def _recommend(results: list[Result]) -> str:
+    """Cheapest config (after the reference) within both tolerances; '-' if none."""
+    ok = [r for r in results[1:]
+          if r.status == "ok" and abs(r.rel_obj_gap or 1) <= TOL_OBJ and (r.jaccard or 0) >= TOL_JAC]
+    return min(ok, key=lambda r: r.seconds).label if ok else "-"
+
+
+def _print_table(title: str, results: list[Result], note: str = "") -> list[str]:
+    lines = [f"### {title}", ""]
+    if note:
+        lines += [note, ""]
+    lines.append("| config | time (s) | status | objective | n_kept | rel.obj.gap | Jaccard vs ref |")
+    lines.append("|---|--:|---|--:|--:|--:|--:|")
+    for r in results:
+        gap = "ref" if r is results[0] else (f"{r.rel_obj_gap:+.4f}" if r.rel_obj_gap is not None else "")
+        jac = "ref" if r is results[0] else (f"{r.jaccard:.4f}" if r.jaccard is not None else "")
+        lines.append(f"| {r.label} | {r.seconds:.0f} | {r.status} | {r.objective:.1f} | "
+                     f"{r.n_kept} | {gap} | {jac} |")
+    rec = _recommend(results)
+    lines += ["", f"Cheapest config within obj≤{TOL_OBJ:.1%} & Jaccard≥{TOL_JAC} of ref: **{rec}**", ""]
+    for ln in lines:
+        print(ln)
+    return lines
+
+
+# --------------------------------------------------------------------------- sweeps
+
+def sweep_ftinit_milp(prep, rxn_scores, store, save) -> list:
+    step, scores = _step0(prep, rxn_scores)
+    mm = prep.min_model
+    doc: list[str] = []
+
+    def cfg(label, **kw):
+        key = ("ftinit_milp", label)
+        if key not in store:
+            print(f"[ftinit_milp] {label} ...", flush=True)
+            r = _run_step0(mm, scores, prep, step, **kw)
+            r.label = label
+            store[key] = r
+            save()
+        return store[key]
+
+    # mip_gap sweep (big_m=100, force_on=0.1)
+    res = [cfg(f"gap={g}", mip_gap=g, big_m=100.0, force_on=0.1, time_limit=900) for g in MIP_GAPS]
+    _finalize(res)
+    doc += _print_table("ftINIT step-0: mip_gap (big_m=100, force_on=0.1)", res)
+
+    # big_m sweep (gap=0.001, force_on=0.1)
+    res = [cfg(f"big_m={int(b)}", mip_gap=0.001, big_m=b, force_on=0.1, time_limit=900) for b in BIG_MS]
+    _finalize(res)
+    doc += _print_table("ftINIT step-0: big_m (gap=0.001, force_on=0.1)", res,
+                        "big_m caps a scored reaction's flux; large values weaken the LP relaxation.")
+
+    # force_on sweep (gap=0.001, big_m=100) — changes the model (connectivity threshold)
+    res = [cfg(f"force_on={fo}", mip_gap=0.001, big_m=100.0, force_on=fo, time_limit=900) for fo in FORCE_ONS]
+    _finalize(res)
+    doc += _print_table("ftINIT step-0: force_on (gap=0.001, big_m=100)", res,
+                        "force_on changes the *model* (min flux to count as 'on'), not just tolerance — "
+                        "Jaccard here measures sensitivity, not error.")
+    return doc
+
+
+def sweep_prep_scale(ref, spont, custom, rxn_scores, store, save) -> list:
+    doc: list[str] = []
+    # One unscaled prep; rescale copies of its min_model for each setting.
+    base = prep_init_model(ref, ext_comp="e", spontaneous=spont, custom=custom, scale=False)
+    step, scores = _step0(base, rxn_scores)
+
+    def cfg(label, msd):
+        key = ("prep_scale", label)
+        if key not in store:
+            print(f"[prep_scale] {label} ...", flush=True)
+            mm = base.min_model.copy()
+            if msd is not None:
+                rescale_for_init(mm, msd)
+            # group_rxn_scores keys are merged ids — identical across copies, so reuse `scores`.
+            t = time.time()
+            try:
+                r = _run_step0(mm, scores, base, step, mip_gap=0.001, big_m=100.0,
+                               force_on=0.1, time_limit=600)
+            except Exception as ex:  # noqa: BLE001  (infeasible/intractable is a finding)
+                r = Result(label=label, seconds=time.time() - t, status=f"FAIL:{type(ex).__name__}",
+                           objective=0.0, n_kept=0)
+            r.label = label
+            store[key] = r
+            save()
+        return store[key]
+
+    res = [cfg("scale=on,msd=25", 25.0)]  # reference = production default
+    res += [cfg(f"msd={int(m)}", m) for m in MAX_STOICH if m != 25.0]
+    res.append(cfg("scale=off", None))
+    _finalize(res)
+    doc += _print_table("prep scaling: rescaleModelForINIT max_stoich_diff (+scale off), big_m=100", res,
+                        "With big_m=100 fixed, scale=off / poor conditioning is expected to be "
+                        "infeasible or far slower — that is the reason scaling is on by default.")
+    return doc
+
+
+def sweep_tinit(ref, rxn_scores, store, save) -> list:
+    doc: list[str] = []
+    ess: list[str] = []
+
+    def cfg(label, **kw):
+        key = ("tinit", label)
+        if key not in store:
+            print(f"[tinit] {label} ...", flush=True)
+            t = time.time()
+            try:
+                out = get_init_model(ref, rxn_scores=rxn_scores, essential_rxns=ess, **kw)
+                r = Result(label=label, seconds=time.time() - t, status="ok",
+                           objective=0.0, n_kept=len(out.model.reactions),
+                           reactions=sorted(x.id for x in out.model.reactions))
+            except Exception as ex:  # noqa: BLE001
+                r = Result(label=label, seconds=time.time() - t, status=f"FAIL:{type(ex).__name__}",
+                           objective=0.0, n_kept=0)
+            store[key] = r
+            save()
+        return store[key]
+
+    tl = 400  # tight time limit so the sweep is affordable
+    res = [cfg(f"gap={g}", eps=1.0, prod_weight=0.5, mip_gap=g, time_limit=tl) for g in (0.001, 0.003, 0.01)]
+    _finalize(res)
+    doc += _print_table(f"tINIT: mip_gap (eps=1, prod_weight=0.5, time_limit={tl}s)", res)
+
+    res = [cfg(f"eps={e}", eps=e, prod_weight=0.5, mip_gap=0.005, time_limit=tl) for e in EPS_VALS]
+    _finalize(res)
+    doc += _print_table("tINIT: eps (gap=0.005) — connectivity flux threshold (changes the model)", res)
+
+    res = [cfg(f"prodw={p}", eps=1.0, prod_weight=p, mip_gap=0.005, time_limit=tl) for p in PROD_WEIGHTS]
+    _finalize(res)
+    doc += _print_table("tINIT: prod_weight (gap=0.005) — metabolite-production reward (changes the model)", res)
+
+    res = [cfg("big_m=ub(None)", eps=1.0, prod_weight=0.5, mip_gap=0.005, time_limit=tl, big_m=None)]
+    res += [cfg(f"big_m={int(b)}", eps=1.0, prod_weight=0.5, mip_gap=0.005, time_limit=tl, big_m=b)
+            for b in (1000.0, 250.0, 100.0)]
+    _finalize(res)
+    doc += _print_table("tINIT: big_m (gap=0.005) — None=per-reaction ub (no rescale on tINIT)", res)
+    return doc
+
+
+def sweep_ftinit_full(prep, rxn_scores, gene_scores, store, save) -> list:
+    doc: list[str] = []
+
+    def cfg(label, **kw):
+        key = ("ftinit_full", label)
+        if key not in store:
+            print(f"[ftinit_full] {label} ...", flush=True)
+            t = time.time()
+            try:
+                out = ftinit(prep, rxn_scores, gene_scores=gene_scores, series="1+1", **kw)
+                r = Result(label=label, seconds=time.time() - t, status="ok",
+                           objective=0.0, n_kept=len(out.reactions),
+                           reactions=sorted(x.id for x in out.reactions))
+            except Exception as ex:  # noqa: BLE001
+                r = Result(label=label, seconds=time.time() - t, status=f"FAIL:{type(ex).__name__}",
+                           objective=0.0, n_kept=0)
+            store[key] = r
+            save()
+        return store[key]
+
+    res = [cfg(f"gap={g}", mip_gap=g, time_limit=600) for g in (0.001, 0.003, 0.01)]
+    res += [cfg(f"big_m={int(b)}", mip_gap=0.003, big_m=b, time_limit=600) for b in (50.0, 250.0)]
+    _finalize(res)
+    doc += _print_table("ftINIT full pipeline ('1+1'): mip_gap & big_m — final model size/stability", res)
+    return doc
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--work", type=Path, default=Path.home() / "hgem_compare")
+    ap.add_argument("--human-gem", type=Path, default=Path.home() / "github" / "Human-GEM")
+    ap.add_argument("--cell", default="HCT116")
+    ap.add_argument("--sweeps", default="ftinit_milp,prep_scale,tinit,ftinit_full",
+                    help="comma-separated subset of: ftinit_milp,prep_scale,tinit,ftinit_full")
+    ap.add_argument("--out", type=Path, default=None, help="results pickle (resumable)")
+    ap.add_argument("--doc", type=Path, default=None, help="write the markdown tables here")
+    args = ap.parse_args()
+
+    out = args.out or args.work / f"init_param_sweep_{args.cell}.pkl"
+    store: dict = pickle.load(open(out, "rb")) if out.exists() else {}
+
+    def save():
+        tmp = Path(f"{out}.part")
+        pickle.dump(store, open(tmp, "wb"))
+        tmp.replace(out)
+
+    sweeps = set(args.sweeps.split(","))
+    t0 = time.time()
+    ref, spont, custom, gene_scores, rxn_scores = _load_inputs(args.work, args.human_gem, args.cell)
+    print(f"[{time.time()-t0:.0f}s] loaded {len(ref.reactions)} rxns, cell={args.cell}", flush=True)
+
+    prep = None
+    if sweeps & {"ftinit_milp", "ftinit_full"}:
+        prep = prep_init_model(ref, ext_comp="e", spontaneous=spont, custom=custom, scale=True)
+        print(f"[{time.time()-t0:.0f}s] scaled prep: min_model {len(prep.min_model.reactions)} rxns",
+              flush=True)
+
+    doc: list[str] = [f"# (f)tINIT parameter calibration — Human-GEM / {args.cell}", "",
+                      "Generated by `scripts/analyze_init_params.py`. Reference (first) row of each "
+                      "tolerance sweep is the tightest setting; gaps/Jaccard are measured against it.", ""]
+    if "ftinit_milp" in sweeps:
+        doc += sweep_ftinit_milp(prep, rxn_scores, store, save)
+    if "prep_scale" in sweeps:
+        doc += sweep_prep_scale(ref, spont, custom, rxn_scores, store, save)
+    if "tinit" in sweeps:
+        doc += sweep_tinit(ref, rxn_scores, store, save)
+    if "ftinit_full" in sweeps:
+        doc += sweep_ftinit_full(prep, rxn_scores, gene_scores, store, save)
+
+    if args.doc:
+        args.doc.write_text("\n".join(doc) + "\n")
+        print(f"\nwrote {args.doc}", flush=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/analyze_init_robustness.py b/scripts/analyze_init_robustness.py
new file mode 100644
index 0000000..1cac79a
--- /dev/null
+++ b/scripts/analyze_init_robustness.py
@@ -0,0 +1,268 @@
+#!/usr/bin/env python3
+"""Robustness of (f)tINIT to degraded transcriptomics input (Phase 4d.7).
+
+The metabolic-task layer is *always part of the pipeline* — it is what makes the output a
+functional model. The experimental variable here is therefore the **transcriptomics
+input**, not whether tasks are used. This script holds the task + gap-fill layer fixed and
+asks: as the expression data gets noisier or sparser, (a) does the model stay functional,
+and (b) how much does the *reaction content* drift from what clean data would give — and
+which parameters keep it stable?
+
+Metrics, per run (tasks always on):
+
+* ``frac``    — fraction of essential metabolic tasks the model performs (``check_tasks``).
+                The task+gap-fill layer should hold this at 1.0; a drop is a real failure.
+* ``Jaccard`` — reaction-set overlap with the **clean-data** model. This is the real cost
+                of bad input: even when all tasks still pass, degraded data changes *which*
+                reactions are kept. The primary robustness signal.
+* ``n_rxns``  — model size (does degraded data bloat or shrink it).
+
+Three independent degradations of the gene-expression vector (severity = higher is worse):
+
+* ``dropout``    — set a random fraction of genes to 0 (→ gene score -5, a strong *remove*
+                   signal). Simulates shallow sequencing / single-cell dropout.
+* ``noise``      — multiply each level by ``exp(N(0, sigma))`` (sigma = severity).
+* ``downsample`` — drop a random fraction of genes entirely (→ ``no_gene_score``).
+
+Two phases:
+
+* **gradient** — task pipeline across degradation levels; shows functional integrity and
+  reaction-set drift vs the clean-data model.
+* **levers**   — at a fixed severe degradation, vary the robustness parameters
+  (``no_gene_score``, ``force_on``; ``prod_weight``/``eps`` for tINIT) to see which keeps
+  the model closest to the clean-data result / most functional.
+
+``--algo ftinit`` (default) or ``tinit``. Resumable; reuses the cached Human-GEM task prep
+(``rg_prep_tasks.pkl``). Loose MIP gap for speed (functionality + set overlap, not the
+exact optimum, are the metrics).
+
+Usage
+-----
+    python scripts/analyze_init_robustness.py --algo ftinit --cell HCT116
+"""
+from __future__ import annotations
+
+import argparse
+import pickle
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import cobra
+import numpy as np
+
+from raven_python.init import (
+    ftinit,
+    gene_scores_from_expression,
+    get_init_model,
+    score_reactions_from_genes,
+)
+from raven_python.tasks import check_tasks, parse_task_list
+
+# Degradation grid (severity per kind). A mild and a severe point per kind.
+GRADIENT = {
+    "dropout": (0.5, 0.7),    # moderate + severe-but-realistic (single-cell dropout ~50-70%);
+    "noise": (1.0, 2.0),      # 90%+ dropout breaks ~all tasks so gap-fill rebuilds the model
+    "downsample": (0.5, 0.7),  # (a per-task MILP each) — pathologically slow and unrealistic.
+}
+LEVER_KIND, LEVER_LEVEL = "dropout", 0.7      # severe-but-tractable point for the levers
+NO_GENE_SCORES = (-1.0, -0.5)                 # vs the default -2 (the gradient row)
+FORCE_ONS = (0.2,)                            # vs the default 0.1
+PROD_WEIGHTS = (0.0, 1.0, 2.0)                # tINIT only (default 0.5)
+EPS_VALS = (0.5, 1.0)                         # tINIT only (gradient default 0.1; test higher)
+
+# Loose solver tolerances (speed; functionality + set overlap, not the exact optimum).
+MIP_GAP, TIME_LIMIT = 0.02, 120.0
+
+
+@dataclass
+class Result:
+    label: str
+    seconds: float
+    status: str
+    n_rxns: int
+    n_pass: int
+    n_tasks: int
+    frac_pass: float
+    reactions: list[str] = field(default_factory=list)
+    jaccard_clean: float | None = None
+
+
+def _jaccard(a: set[str], b: set[str]) -> float:
+    return len(a & b) / len(a | b) if (a or b) else 1.0
+
+
+def degrade(expr: dict[str, float], kind: str, level: float, seed: int) -> dict[str, float]:
+    """Return a degraded copy of the expression dict (severity ``level``)."""
+    if level <= 0:
+        return dict(expr)
+    rng = np.random.default_rng(seed)
+    genes = list(expr)
+    if kind == "dropout":
+        out = dict(expr)
+        for g in rng.choice(genes, size=int(level * len(genes)), replace=False):
+            out[g] = 0.0
+        return out
+    if kind == "noise":
+        return {g: max(v * float(np.exp(rng.normal(0.0, level))), 0.0) for g, v in expr.items()}
+    if kind == "downsample":
+        keep = set(rng.choice(genes, size=int((1 - level) * len(genes)), replace=False))
+        return {g: v for g, v in expr.items() if g in keep}
+    raise ValueError(f"unknown degradation kind {kind!r}")
+
+
+def functionality(model: cobra.Model, tasks) -> tuple[int, int]:
+    """(passed, total) essential tasks the extracted model can perform."""
+    results = check_tasks(model, tasks)
+    return sum(t.passed for t in results), len(results)
+
+
+def _measure(label, builder, tasks, clean_set=None) -> Result:
+    t = time.time()
+    try:
+        model = builder()
+        n_pass, n_tasks = functionality(model, tasks)
+        rset = sorted(x.id for x in model.reactions)
+        r = Result(label, time.time() - t, "ok", len(rset), n_pass, n_tasks,
+                   n_pass / n_tasks if n_tasks else 0.0, rset)
+        if clean_set is not None:
+            r.jaccard_clean = _jaccard(set(rset), clean_set)
+    except Exception as ex:  # noqa: BLE001  (infeasible/failed build is itself a finding)
+        msg = str(ex)[:80].replace("\n", " ") or type(ex).__name__
+        print(f"  FAIL {label}: {type(ex).__name__}: {ex}", flush=True)
+        r = Result(label, time.time() - t, f"FAIL:{msg}", 0, 0, len(tasks), 0.0)
+    return r
+
+
+def _table(title, results, note="") -> list[str]:
+    lines = [f"### {title}", ""]
+    if note:
+        lines += [note, ""]
+    lines.append("| config | time (s) | status | n_rxns | tasks passed | frac | Jaccard vs clean |")
+    lines.append("|---|--:|---|--:|--:|--:|--:|")
+    for r in results:
+        jac = f"{r.jaccard_clean:.3f}" if r.jaccard_clean is not None else "ref"
+        lines.append(f"| {r.label} | {r.seconds:.0f} | {r.status} | {r.n_rxns} | "
+                     f"{r.n_pass}/{r.n_tasks} | {r.frac_pass:.3f} | {jac} |")
+    lines.append("")
+    for ln in lines:
+        print(ln)
+    return lines
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--work", type=Path, default=Path.home() / "hgem_compare")
+    ap.add_argument("--human-gem", type=Path, default=Path.home() / "github" / "Human-GEM")
+    ap.add_argument("--cell", default="HCT116")
+    ap.add_argument("--algo", choices=("ftinit", "tinit"), default="ftinit")
+    ap.add_argument("--phase", default="gradient,levers")
+    ap.add_argument("--seed", type=int, default=0)
+    ap.add_argument("--out", type=Path, default=None)
+    ap.add_argument("--doc", type=Path, default=None)
+    args = ap.parse_args()
+
+    out = args.out or args.work / f"init_robustness_{args.algo}_{args.cell}.pkl"
+    store: dict = pickle.load(open(out, "rb")) if out.exists() else {}
+
+    def save():
+        tmp = Path(f"{out}.part")
+        pickle.dump(store, open(tmp, "wb"))
+        tmp.replace(out)
+
+    def cached(key, fn):
+        if key not in store:
+            print(f"[{args.algo}] {key[1]} ...", flush=True)
+            store[key] = fn()
+            save()
+        return store[key]
+
+    t0 = time.time()
+    ref = cobra.io.read_sbml_model(str(args.work / "raven_refModel.xml"))
+    ref.solver = cobra.Configuration().solver
+    expr: dict[str, float] = {}
+    with open(args.human_gem / "data" / "datasets" / "Hart2015_RNAseq.txt") as f:
+        h = f.readline().rstrip("\n").split("\t")
+        c = h.index(args.cell)
+        for line in f:
+            p = line.rstrip("\n").split("\t")
+            expr[p[0]] = float(p[c])
+    tasks = parse_task_list(str(args.human_gem / "data" / "metabolicTasks" /
+                                "metabolicTasks_Essential.txt"))
+    prep = pickle.load(open(args.work / "rg_prep_tasks.pkl", "rb"))  # ftINIT uses task layer
+    task_layer_note = ("task layer always on" if args.algo == "ftinit"
+                       else "essential_rxns=[] (tINIT lb=eps incompatible with many essentials)")
+    print(f"[{time.time()-t0:.0f}s] ref {len(ref.reactions)} rxns, {len(tasks)} tasks, "
+          f"cell={args.cell}, algo={args.algo} ({task_layer_note})", flush=True)
+
+    def model_for(e, **kw):
+        g = gene_scores_from_expression(e, 1.0)
+        r = score_reactions_from_genes(ref, g, no_gene_score=kw.get("no_gene_score", -2.0))
+        if args.algo == "ftinit":
+            return ftinit(prep, r, gene_scores=g, series="1+1",
+                          force_on=kw.get("force_on", 0.1), mip_gap=MIP_GAP, time_limit=TIME_LIMIT)
+        # tINIT's essential_rxns are forced via lb=eps; >100 essentials simultaneously is
+        # infeasible at genome scale regardless of eps (see docs/init_param_calibration.md
+        # §1.5). tINIT is therefore run *without* essentials here — the realistic
+        # tINIT-without-gap-fill picture. Use a small default eps (0.1) all the same to
+        # avoid the unrelated connectivity-threshold over-constraint.
+        return get_init_model(ref, rxn_scores=r, essential_rxns=[],
+                              prod_weight=kw.get("prod_weight", 0.5), eps=kw.get("eps", 0.1),
+                              mip_gap=MIP_GAP, time_limit=TIME_LIMIT).model
+
+    phases = set(args.phase.split(","))
+    doc = [f"# (f)tINIT robustness to degraded transcriptomics — Human-GEM / {args.cell} / {args.algo}",
+           "", "Task + gap-fill layer is always on (it is part of the pipeline); the variable is the "
+           "expression input. Functional = fraction of essential tasks performed (check_tasks); "
+           "Jaccard is reaction-set overlap with the clean-data model. Generated by "
+           "`scripts/analyze_init_robustness.py`.", ""]
+
+    clean = cached(("clean", "clean"), lambda: _measure("clean", lambda: model_for(expr), tasks))
+    clean_set = set(clean.reactions)
+    clean.jaccard_clean = None  # it is the reference
+    doc += _table("Clean-data baseline", [clean])
+
+    if "gradient" in phases:
+        for kind, levels in GRADIENT.items():
+            rows = [clean]
+            for lvl in levels:
+                e = degrade(expr, kind, lvl, args.seed)
+                rows.append(cached((f"grad_{kind}", f"{kind}={lvl}"), lambda e=e, lvl=lvl, kind=kind:
+                            _measure(f"{kind}={lvl}", lambda: model_for(e), tasks, clean_set)))
+            doc += _table(f"Gradient: {kind} (task pipeline always on)", rows,
+                          "Higher severity = noisier/sparser input. frac should stay ~1.0 (the task "
+                          "layer's job); the Jaccard drop is how much degraded data changes the model.")
+
+    if "levers" in phases:
+        e = degrade(expr, LEVER_KIND, LEVER_LEVEL, args.seed)
+        tag = f"{LEVER_KIND}={LEVER_LEVEL}"
+        rows = []
+        if args.algo == "ftinit":
+            for ngs in NO_GENE_SCORES:
+                rows.append(cached(("lever", f"no_gene_score={ngs}"), lambda ngs=ngs:
+                            _measure(f"no_gene_score={ngs}", lambda: model_for(e, no_gene_score=ngs),
+                                     tasks, clean_set)))
+            for fo in FORCE_ONS:
+                rows.append(cached(("lever", f"force_on={fo}"), lambda fo=fo:
+                            _measure(f"force_on={fo}", lambda: model_for(e, force_on=fo),
+                                     tasks, clean_set)))
+        else:
+            for pw in PROD_WEIGHTS:
+                rows.append(cached(("lever", f"prod_weight={pw}"), lambda pw=pw:
+                            _measure(f"prod_weight={pw}", lambda: model_for(e, prod_weight=pw),
+                                     tasks, clean_set)))
+            for ev in EPS_VALS:
+                rows.append(cached(("lever", f"eps={ev}"), lambda ev=ev:
+                            _measure(f"eps={ev}", lambda: model_for(e, eps=ev), tasks, clean_set)))
+        doc += _table(f"Levers at {tag}: which parameter keeps the model closest to clean?", rows,
+                      "Compare against the default-parameter row for this severity in the gradient "
+                      "table above (no_gene_score=-2, force_on=0.1 / prod_weight=0.5, eps=1.0).")
+
+    if args.doc:
+        args.doc.write_text("\n".join(doc) + "\n")
+        print(f"\nwrote {args.doc}", flush=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/analyze_init_solvers.py b/scripts/analyze_init_solvers.py
new file mode 100644
index 0000000..e1e7b6c
--- /dev/null
+++ b/scripts/analyze_init_solvers.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+"""Cross-solver benchmark for ftINIT on a genome-scale model (Phase 4d.7).
+
+The clean-data calibration and robustness studies tuned (and ran) on Gurobi. The CI
+``tests/test_init_solvers.py`` checks correctness on toy models for every installed MILP
+solver; this script measures **genome-scale tractability and reaction-set agreement** —
+does the same ftINIT pipeline that works in seconds on Gurobi also complete on HiGHS or
+GLPK, in what time, and producing the same model?
+
+For each installed MILP-capable optlang interface (Gurobi, ``hybrid`` for HiGHS, GLPK) it
+runs the *same* ftINIT call (cached Human-GEM no-task prep + HCT116 scores) with the same
+``mip_gap``/``time_limit``, records (status, wall time, reaction set), and computes the
+pairwise Jaccard of the resulting reaction sets. Solvers that fail (the optlang
+``hybrid_interface`` ``clone`` bug, or GLPK timing out at genome scale) are recorded as
+such — that *is* the cross-solver picture.
+
+Usage
+-----
+    python scripts/analyze_init_solvers.py --cell HCT116 --time-limit 900 \
+        --doc docs/init_solver_benchmark.md
+"""
+from __future__ import annotations
+
+import argparse
+import importlib.util
+import pickle
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import cobra
+
+from raven_python.init import ftinit, gene_scores_from_expression, score_reactions_from_genes
+
+_INTERFACES = {"gurobi": "gurobi_interface", "hybrid": "hybrid_interface", "glpk": "glpk_interface"}
+
+
+def _available_solvers() -> list[str]:
+    return [name for name, mod in _INTERFACES.items()
+            if importlib.util.find_spec(f"optlang.{mod}") is not None]
+
+
+@dataclass
+class Result:
+    solver: str
+    seconds: float
+    status: str
+    n_rxns: int
+    reactions: list[str] = field(default_factory=list)
+
+
+def _jaccard(a: set[str], b: set[str]) -> float:
+    return len(a & b) / len(a | b) if (a or b) else 1.0
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--work", type=Path, default=Path.home() / "hgem_compare")
+    ap.add_argument("--human-gem", type=Path, default=Path.home() / "github" / "Human-GEM")
+    ap.add_argument("--cell", default="HCT116")
+    ap.add_argument("--mip-gap", type=float, default=0.001)
+    ap.add_argument("--time-limit", type=float, default=900.0)
+    ap.add_argument("--out", type=Path, default=None)
+    ap.add_argument("--doc", type=Path, default=None)
+    args = ap.parse_args()
+
+    out = args.out or args.work / f"init_solver_bench_{args.cell}.pkl"
+    store: dict = pickle.load(open(out, "rb")) if out.exists() else {}
+
+    def save():
+        tmp = Path(f"{out}.part")
+        pickle.dump(store, open(tmp, "wb"))
+        tmp.replace(out)
+
+    expr: dict[str, float] = {}
+    with open(args.human_gem / "data" / "datasets" / "Hart2015_RNAseq.txt") as f:
+        h = f.readline().rstrip("\n").split("\t")
+        c = h.index(args.cell)
+        for line in f:
+            p = line.rstrip("\n").split("\t")
+            expr[p[0]] = float(p[c])
+    if not (args.work / "rg_prep.pkl").exists():
+        raise SystemExit(f"missing prep at {args.work / 'rg_prep.pkl'} — run the validation first")
+
+    solvers = _available_solvers()
+    print(f"available MILP solvers: {solvers}", flush=True)
+
+    def run(solver: str) -> Result:
+        if solver in store:
+            print(f"[{solver}] cached, skip", flush=True)
+            return store[solver]
+        print(f"[{solver}] running ...", flush=True)
+        t = time.time()
+        try:
+            # Fresh ref + prep load per solver so a broken interface (e.g. the optlang
+            # hybrid_interface clone bug at .solver=) doesn't pollute the next solver's state.
+            ref = cobra.io.read_sbml_model(str(args.work / "raven_refModel.xml"))
+            ref.solver = solver
+            local_prep = pickle.load(open(args.work / "rg_prep.pkl", "rb"))
+            local_prep.min_model.solver = solver
+            g = gene_scores_from_expression(expr, 1.0)
+            r = score_reactions_from_genes(ref, g)
+            model = ftinit(local_prep, r, gene_scores=g, series="1+1",
+                           mip_gap=args.mip_gap, time_limit=args.time_limit)
+            rset = sorted(x.id for x in model.reactions)
+            res = Result(solver, time.time() - t, "ok", len(rset), rset)
+        except Exception as ex:  # noqa: BLE001 - failure mode is the finding
+            res = Result(solver, time.time() - t,
+                         f"FAIL:{type(ex).__name__}: {str(ex)[:80]}", 0, [])
+        store[solver] = res
+        save()
+        return res
+
+    results: dict[str, Result] = {s: run(s) for s in solvers}
+
+    # Reporting.
+    lines = [f"# Cross-solver ftINIT benchmark — Human-GEM / {args.cell}", "",
+             f"Same `ftinit()` call (no-task scaled prep; `mip_gap={args.mip_gap}`, "
+             f"`time_limit={args.time_limit}s`) run with each installed MILP-capable "
+             f"optlang interface. Generated by `scripts/analyze_init_solvers.py`.", "",
+             "## Per-solver result", "",
+             "| solver | time (s) | status | n_rxns |",
+             "|--------|---------:|--------|-------:|"]
+    for s, r in results.items():
+        lines.append(f"| {s} | {r.seconds:.0f} | {r.status} | {r.n_rxns} |")
+    lines.append("")
+
+    ok = {s: r for s, r in results.items() if r.status == "ok" and r.reactions}
+    if len(ok) >= 2:
+        lines += ["## Reaction-set agreement (Jaccard)", "",
+                  "| solvers | shared | only A | only B | Jaccard |",
+                  "|---------|-------:|-------:|-------:|--------:|"]
+        names = sorted(ok)
+        for i, a in enumerate(names):
+            for b in names[i + 1:]:
+                sa, sb = set(ok[a].reactions), set(ok[b].reactions)
+                lines.append(f"| {a} vs {b} | {len(sa & sb)} | {len(sa - sb)} | "
+                             f"{len(sb - sa)} | {_jaccard(sa, sb):.3f} |")
+        lines.append("")
+
+    text = "\n".join(lines) + "\n"
+    print(text)
+    if args.doc:
+        args.doc.write_text(text)
+        print(f"wrote {args.doc}", flush=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/benchmark_localization_yeast.py b/scripts/benchmark_localization_yeast.py
new file mode 100644
index 0000000..31c2076
--- /dev/null
+++ b/scripts/benchmark_localization_yeast.py
@@ -0,0 +1,297 @@
+#!/usr/bin/env python3
+"""Benchmark :func:`raven_python.localization.predict_localization` on yeast-GEM.
+
+Treats yeast-GEM's curated compartmentalisation as ground truth, flattens the model with
+:func:`merge_compartments` to a single compartment (so the algorithm cannot lean on
+metabolite-topology evidence), then asks ``predict_localization`` to place every
+GPR-annotated reaction back into a compartment given a per-gene score table.
+
+The reference score table is derived directly from yeast-GEM (each gene scores 1.0 in
+the compartments where its reactions actually live). Noise can be added — a configurable
+fraction of genes have a random other compartment swapped in as the best score — to see
+how the algorithm degrades with imperfect predictor evidence. With ``--scores-csv`` the
+reference table is replaced by a real predictor output (WoLF PSORT / DeepLoc / hand-built
+``gene_id × compartment`` CSV).
+
+Outputs a per-noise-level accuracy summary and, optionally, a markdown table to a doc.
+
+Usage
+-----
+    python scripts/benchmark_localization_yeast.py \\
+        --yeast-gem ~/github/pcSecYeastSpecies/Model/yeastGEM.xml \\
+        --noise 0,0.1,0.25,0.5 \\
+        --doc /tmp/yeast_localization_benchmark.md
+"""
+from __future__ import annotations
+
+import argparse
+import time
+from pathlib import Path
+
+import cobra
+import numpy as np
+import pandas as pd
+
+from raven_python.localization import LocalizationScores, predict_localization
+from raven_python.manipulation.compartments import merge_compartments
+
+# --------------------------------------------------------------------------- inputs
+
+def build_truth(model: cobra.Model) -> dict[str, str]:
+    """For each single-compartment GPR-annotated reaction, ``{rxn_id: compartment}``.
+
+    Boundary reactions and multi-compartment transports are excluded — those aren't
+    placeable by the algorithm and shouldn't enter the benchmark.
+    """
+    truth: dict[str, str] = {}
+    for r in model.reactions:
+        if r.boundary or not r.genes:
+            continue
+        comps = {m.compartment for m in r.metabolites if m.compartment}
+        if len(comps) != 1:
+            continue
+        truth[r.id] = next(iter(comps))
+    return truth
+
+
+def derive_scores_from_model(model: cobra.Model) -> LocalizationScores:
+    """Each gene scores 1.0 in every compartment where its reactions actually live.
+
+    For genes shared across compartments (dual-localised in the curation), all of those
+    compartments get the top score — which is exactly the situation
+    ``multi_compartment_penalty`` is designed to handle.
+    """
+    rows: dict[str, dict[str, float]] = {}
+    for g in model.genes:
+        seen: set[str] = set()
+        for r in g.reactions:
+            for m in r.metabolites:
+                if m.compartment:
+                    seen.add(m.compartment)
+        if seen:
+            rows[g.id] = {c: 1.0 for c in seen}
+    df = pd.DataFrame.from_dict(rows, orient="index").fillna(0.0)
+    df.index.name = "gene_id"
+    return LocalizationScores(df)
+
+
+def add_noise(scores: LocalizationScores, fraction: float, seed: int) -> LocalizationScores:
+    """For ``fraction`` of genes, replace their score row with a single 1.0 in a random
+    *wrong* compartment (everything else 0). Simulates "predictor is confidently wrong".
+    """
+    if fraction <= 0:
+        return scores
+    rng = np.random.default_rng(seed)
+    df = scores.df.copy()
+    compartments = list(df.columns)
+    n_to_noise = int(round(fraction * len(df)))
+    targets = rng.choice(df.index, size=n_to_noise, replace=False)
+    for g in targets:
+        # find a wrong compartment (any non-top one) to confidently mis-predict
+        true_top = df.loc[g].idxmax() if df.loc[g].max() > 0 else compartments[0]
+        candidates = [c for c in compartments if c != true_top]
+        wrong = rng.choice(candidates)
+        df.loc[g, :] = 0.0
+        df.at[g, wrong] = 1.0
+    return LocalizationScores(df)
+
+
+def load_csv_scores(path: Path) -> LocalizationScores:
+    """Load a ``gene_id × compartment`` CSV (first column = gene_id)."""
+    df = pd.read_csv(path, index_col=0)
+    df.index.name = "gene_id"
+    df = df.apply(pd.to_numeric, errors="coerce").fillna(0.0)
+    return LocalizationScores(df)
+
+
+# --------------------------------------------------------------------------- benchmark
+
+def run_one_test(
+    model_orig: cobra.Model,
+    truth: dict[str, str],
+    scores: LocalizationScores,
+    *,
+    default_compartment: str,
+    transport_cost: float,
+    multi_compartment_penalty: float,
+    mip_gap: float | None,
+    time_limit: float | None,
+) -> dict:
+    """One MILP solve + accuracy summary.
+
+    Flattens the model to a single compartment (using the curated default as the merged
+    id, so reactions truly *in* the default appear unmoved when correctly predicted),
+    runs ``predict_localization`` on every truth-set reaction, and returns metrics +
+    per-reaction predictions.
+    """
+    flat, _, _ = merge_compartments(
+        model_orig, merged_id=default_compartment, merged_name=default_compartment,
+        drop_single_metabolite_reactions=False, deduplicate_reactions=False,
+    )
+    # The flattened model may have lost some reactions if their net stoichiometry
+    # cancelled after the merge — restrict the truth set to surviving reactions.
+    surviving = {r.id for r in flat.reactions}
+    relevant = {rid: c for rid, c in truth.items() if rid in surviving}
+
+    t = time.time()
+    proposal = predict_localization(
+        flat, scores, list(relevant),
+        default_compartment=default_compartment,
+        transport_cost=transport_cost,
+        multi_compartment_penalty=multi_compartment_penalty,
+        apply=False, mip_gap=mip_gap, time_limit=time_limit,
+    )
+    elapsed = time.time() - t
+
+    # `moved` only lists reactions whose chosen compartment differs from the flattened
+    # `from_compartment` (i.e. `default_compartment`). Anything not in `moved` was
+    # placed in the default — record it as such.
+    moved_to = dict(zip(proposal.moved["rxn_id"], proposal.moved["to_compartment"], strict=True))
+    predictions = {rid: moved_to.get(rid, default_compartment) for rid in relevant}
+
+    correct = sum(predictions[rid] == c for rid, c in relevant.items())
+    unplaced = set(proposal.unplaced_reactions) & set(relevant)
+    return {
+        "seconds": elapsed,
+        "n_total": len(relevant),
+        "n_correct": correct,
+        "n_unplaced": len(unplaced),
+        "accuracy": correct / len(relevant) if relevant else 0.0,
+        "predictions": predictions,
+        "truth": relevant,
+    }
+
+
+def confusion_matrix(predictions: dict[str, str], truth: dict[str, str]) -> pd.DataFrame:
+    """Tidy `true × predicted` count matrix."""
+    rows = pd.DataFrame({
+        "true": [truth[r] for r in predictions],
+        "predicted": list(predictions.values()),
+    })
+    cm = rows.groupby(["true", "predicted"]).size().unstack(fill_value=0)
+    return cm.sort_index().sort_index(axis=1)
+
+
+# --------------------------------------------------------------------------- main
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--yeast-gem", type=Path,
+                    default=Path.home() / "github" / "pcSecYeastSpecies" / "Model" / "yeastGEM.xml")
+    ap.add_argument("--scores-csv", type=Path,
+                    help="optional gene_id × compartment CSV; defaults to from-model scores")
+    ap.add_argument("--noise", default="0,0.1,0.25,0.5",
+                    help="comma-separated noise fractions to sweep (ignored with --scores-csv)")
+    ap.add_argument("--default-compartment", default="c")
+    ap.add_argument("--transport-cost", type=float, default=0.5)
+    ap.add_argument("--multi-compartment-penalty", type=float, default=0.5)
+    ap.add_argument("--mip-gap", type=float, default=0.01)
+    ap.add_argument("--time-limit", type=float, default=900)
+    ap.add_argument("--seed", type=int, default=42)
+    ap.add_argument("--max-reactions", type=int, default=None,
+                    help="optionally subsample the truth set to N reactions (keeps the "
+                         "compartment distribution, drawn deterministically with --seed)")
+    ap.add_argument("--doc", type=Path, help="write a markdown summary here")
+    args = ap.parse_args()
+
+    print(f"loading {args.yeast_gem} ...", flush=True)
+    model = cobra.io.read_sbml_model(str(args.yeast_gem))
+    truth = build_truth(model)
+    print(f"yeast-GEM: {len(model.reactions)} reactions, {len(model.genes)} genes, "
+          f"{len(model.compartments)} compartments; truth set: {len(truth)} reactions",
+          flush=True)
+    if args.max_reactions and args.max_reactions < len(truth):
+        # Stratified subsample: keep the original compartment distribution.
+        rng = np.random.default_rng(args.seed)
+        by_comp: dict[str, list[str]] = {}
+        for rid, c in truth.items():
+            by_comp.setdefault(c, []).append(rid)
+        keep: list[str] = []
+        for rids in by_comp.values():
+            n = max(1, round(args.max_reactions * len(rids) / len(truth)))
+            keep += list(rng.choice(rids, size=min(n, len(rids)), replace=False))
+        truth = {rid: truth[rid] for rid in keep}
+        print(f"subsampled truth set to {len(truth)} reactions "
+              f"(--max-reactions={args.max_reactions})", flush=True)
+
+    base_scores: LocalizationScores
+    if args.scores_csv:
+        print(f"loading scores from {args.scores_csv} ...", flush=True)
+        base_scores = load_csv_scores(args.scores_csv)
+        noise_levels = [0.0]  # external scores: no synthetic noise sweep
+    else:
+        print("deriving reference scores from yeast-GEM ...", flush=True)
+        base_scores = derive_scores_from_model(model)
+        noise_levels = [float(x) for x in args.noise.split(",")]
+
+    results: list[dict] = []
+    for noise in noise_levels:
+        scores = add_noise(base_scores, noise, args.seed) if noise > 0 else base_scores
+        print(f"\n=== noise={noise:.2f} ({int(noise * len(base_scores.df))} genes "
+              f"confidently mis-scored) ===", flush=True)
+        r = run_one_test(
+            model, truth, scores,
+            default_compartment=args.default_compartment,
+            transport_cost=args.transport_cost,
+            multi_compartment_penalty=args.multi_compartment_penalty,
+            mip_gap=args.mip_gap, time_limit=args.time_limit,
+        )
+        r["noise"] = noise
+        results.append(r)
+        print(f"  solved in {r['seconds']:.0f}s — accuracy {r['n_correct']}/{r['n_total']} = "
+              f"{r['accuracy']:.3f} ({r['n_unplaced']} unplaced)", flush=True)
+
+    # --- Reporting -------------------------------------------------------------
+    lines: list[str] = []
+    lines += ["# yeast-GEM localisation benchmark", "",
+              f"Model: `{args.yeast_gem.name}` — {len(model.reactions)} reactions, "
+              f"{len(model.genes)} genes, {len(model.compartments)} compartments. "
+              f"Truth set: {len(truth)} single-compartment GPR-annotated reactions. "
+              f"Default compartment for the merged model: `{args.default_compartment}`. "
+              f"`transport_cost={args.transport_cost}`, "
+              f"`multi_compartment_penalty={args.multi_compartment_penalty}`, "
+              f"`mip_gap={args.mip_gap}`, `time_limit={args.time_limit}s`.", "",
+              "## Accuracy vs. predictor noise", "",
+              "| noise | seconds | n_total | n_correct | n_unplaced | accuracy |",
+              "|------:|--------:|--------:|----------:|-----------:|---------:|"]
+    for r in results:
+        lines.append(
+            f"| {r['noise']:.2f} | {r['seconds']:.0f} | {r['n_total']} | "
+            f"{r['n_correct']} | {r['n_unplaced']} | {r['accuracy']:.3f} |"
+        )
+    lines.append("")
+
+    # Confusion matrix for the lowest-noise run (typically the most informative).
+    best = min(results, key=lambda x: x["noise"])
+    cm = confusion_matrix(best["predictions"], best["truth"])
+    lines += [f"## Confusion matrix at noise={best['noise']:.2f}", "",
+              "Rows = curated (true) compartment; columns = predicted.", ""]
+    lines.append("| true \\ pred | " + " | ".join(str(c) for c in cm.columns) + " |")
+    lines.append("|---" + "|---" * len(cm.columns) + "|")
+    for true_c, row in cm.iterrows():
+        lines.append(f"| **{true_c}** | " + " | ".join(str(int(v)) for v in row) + " |")
+    lines.append("")
+
+    # Per-compartment accuracy at the lowest-noise run.
+    per_comp: dict[str, tuple[int, int]] = {}
+    for rid, true_c in best["truth"].items():
+        n_true, n_correct = per_comp.get(true_c, (0, 0))
+        per_comp[true_c] = (n_true + 1, n_correct + (best["predictions"][rid] == true_c))
+    lines += [f"## Per-compartment accuracy at noise={best['noise']:.2f}", "",
+              "| compartment | n | n_correct | accuracy |",
+              "|---|--:|--:|--:|"]
+    for c in sorted(per_comp):
+        n, ok = per_comp[c]
+        lines.append(f"| {c} | {n} | {ok} | {ok / n:.3f} |")
+    lines.append("")
+
+    text = "\n".join(lines) + "\n"
+    print("\n" + text)
+    if args.doc:
+        args.doc.write_text(text)
+        print(f"wrote {args.doc}", flush=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/build_kegg_artefacts.py b/scripts/build_kegg_artefacts.py
new file mode 100644
index 0000000..13fd00e
--- /dev/null
+++ b/scripts/build_kegg_artefacts.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+"""Build the publishable KEGG artefact set for one release (maintainer-side).
+
+Runs the maintainer pipeline against an arranged KEGG dump (see
+``download_kegg_dump`` / ``fetch_keggdb``):
+
+* 3b.2 — ``parse_kegg_dump`` → ``reference_model.yml.gz`` + the gzipped-TSV tables;
+* 3b.3 — ``build_hmm_library`` per domain → a pressed ``<domain>.hmm`` (+ hmmpress
+  sidecars), named so :func:`raven_python.data.ensure_kegg_hmm_library` can fetch them.
+
+Everything lands in ``--out`` ready to upload as release assets; feed that
+directory to ``scripts/make_registry_snippet.py data`` to emit the registry entry.
+
+Examples
+--------
+Tables + reference model only (fast, no binaries)::
+
+    python scripts/build_kegg_artefacts.py --keggdb keggdb --out artefacts
+
+Full build incl. both HMM libraries (slow; needs HMMER/MAFFT/CD-HIT)::
+
+    python scripts/build_kegg_artefacts.py --keggdb keggdb --out artefacts \\
+        --hmms --threads 8
+"""
+from __future__ import annotations
+
+import argparse
+import shutil
+from pathlib import Path
+
+from raven_python.reconstruction.kegg import (
+    build_hmm_library,
+    parse_kegg_dump,
+    read_kegg_table,
+)
+
+# hmmpress sidecar extensions, alongside the .hmm.
+_HMM_SIDECARS = (".h3f", ".h3i", ".h3m", ".h3p")
+
+
+def _publish_library(work: dict, out_dir: Path, domain: str) -> Path:
+    """Copy a built ``library.hmm`` (+ sidecars) to ``out_dir/<domain>.hmm``."""
+    library = work["library"]
+    if library is None:
+        raise SystemExit(f"No HMMs built for {domain!r}; nothing to publish.")
+    target = out_dir / f"{domain}.hmm"
+    shutil.copyfile(library, target)
+    for suffix in _HMM_SIDECARS:
+        sidecar = library.with_name(library.name + suffix)
+        if sidecar.exists():
+            shutil.copyfile(sidecar, target.with_name(target.name + suffix))
+    return target
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument("--keggdb", required=True, type=Path, help="arranged KEGG dump directory")
+    parser.add_argument("--out", required=True, type=Path, help="artefact output directory")
+    parser.add_argument("--hmms", action="store_true", help="also build the HMM libraries")
+    parser.add_argument(
+        "--domains", nargs="+", default=["prokaryotes", "eukaryotes"], help="HMM domains to build"
+    )
+    parser.add_argument("--threads", type=int, default=1)
+    parser.add_argument("--seq-identity", type=float, default=0.9, help="CD-HIT identity (-1 skips)")
+    parser.add_argument(
+        "--parttree-residues", type=int, default=None,
+        help="total-residue budget above which MAFFT uses PartTree (default 1M, tuned "
+             "for ~7 GB RAM; raise on machines with more memory)",
+    )
+    args = parser.parse_args(argv)
+
+    args.out.mkdir(parents=True, exist_ok=True)
+    print(">>> Parsing KEGG dump (3b.2)...")
+    paths = parse_kegg_dump(args.keggdb, args.out)
+    for name, path in paths.items():
+        print(f"    {name}: {path}")
+
+    if args.hmms:
+        ogk = read_kegg_table(paths["organism_gene_ko"])
+        genes_pep = args.keggdb / "genes.pep"
+        taxonomy = args.keggdb / "taxonomy"
+        for domain in args.domains:
+            print(f">>> Building HMM library for {domain} (3b.3)...")
+            work = build_hmm_library(
+                ogk, genes_pep, taxonomy, args.out / f"_hmms-{domain}",
+                domain=domain, seq_identity=args.seq_identity,
+                parttree_residues=args.parttree_residues, threads=args.threads,
+            )
+            published = _publish_library(work, args.out, domain)
+            print(f"    {domain}: {published} ({len(work['hmms'])} profiles)")
+
+    print(f"\n>>> Done. Upload the contents of {args.out} as release assets, then run:")
+    print("    python scripts/make_registry_snippet.py data --dataset kegg "
+          f"--version <VER> --dir {args.out} --base-url <RELEASE_URL>")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/make_registry_snippet.py b/scripts/make_registry_snippet.py
new file mode 100644
index 0000000..3efa49e
--- /dev/null
+++ b/scripts/make_registry_snippet.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+"""Emit ready-to-paste registry entries for published artefacts / binary ZIPs.
+
+Computes the SHA256 of each file and prints the Python/JSON entry to merge into
+``raven_python.data._DATA_REGISTRY`` (data artefacts) or ``raven_python.binaries._REGISTRY``
+(binary bundles). Run once per release, after uploading the files to the release.
+
+Examples
+--------
+Data artefacts (KEGG reference model + tables + HMM libraries) for one release::
+
+    python scripts/make_registry_snippet.py data \\
+        --dataset kegg --version kegg116 --dir artefacts \\
+        --base-url https://github.com/ORG/raven_python/releases/download/kegg-data-kegg116
+
+Binary bundle (one ZIP per platform, named ``<bundle>-<version>-<os>-<arch>.zip``)::
+
+    python scripts/make_registry_snippet.py binary \\
+        --bundle blast --version 2.16.0 --provides blastp makeblastdb --dir zips \\
+        --base-url https://github.com/ORG/raven_python/releases/download/blast-2.16.0
+
+The SHA256 helper is shared with the runtime resolvers (``raven_python.binaries``), so
+published checksums always match what ``ensure_data`` / ``ensure_binary`` verify.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+from raven_python.binaries import _sha256
+
+
+def _files_in(directory: Path) -> list[Path]:
+    """Regular, non-hidden files in ``directory``, sorted by name."""
+    return sorted(p for p in directory.iterdir() if p.is_file() and not p.name.startswith("."))
+
+
+def data_entry(dataset: str, version: str, base_url: str, directory: Path) -> dict:
+    """Build the ``_DATA_REGISTRY[dataset]`` entry for every file in ``directory``."""
+    base = base_url.rstrip("/")
+    files = {
+        p.name: {"url": f"{base}/{p.name}", "sha256": _sha256(p)} for p in _files_in(directory)
+    }
+    if not files:
+        raise SystemExit(f"No files found in {directory}")
+    return {"version": version, "files": files}
+
+
+def binary_entry(
+    bundle: str, version: str, provides: list[str], base_url: str, directory: Path
+) -> dict:
+    """Build the ``_REGISTRY[bundle]`` entry from ``<bundle>-<version>-<os>-<arch>.zip``."""
+    base = base_url.rstrip("/")
+    prefix = f"{bundle}-{version}-"
+    platforms = {}
+    for zip_path in directory.glob(f"{prefix}*.zip"):
+        platform = zip_path.name[len(prefix) : -len(".zip")]
+        platforms[platform] = {"url": f"{base}/{zip_path.name}", "sha256": _sha256(zip_path)}
+    if not platforms:
+        raise SystemExit(f"No {prefix}*.zip files found in {directory}")
+    return {"version": version, "provides": provides, "platforms": dict(sorted(platforms.items()))}
+
+
+def render(key: str, entry: dict) -> str:
+    """Render ``{key: entry}`` as an indented JSON block (valid Python to paste)."""
+    return json.dumps({key: entry}, indent=4)
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    sub = parser.add_subparsers(dest="kind", required=True)
+
+    d = sub.add_parser("data", help="data-artefact registry entry (raven_python.data)")
+    d.add_argument("--dataset", required=True, help="dataset key, e.g. 'kegg'")
+    d.add_argument("--version", required=True)
+    d.add_argument("--dir", required=True, type=Path, help="directory of uploaded artefacts")
+    d.add_argument("--base-url", required=True, help="release download URL prefix")
+
+    b = sub.add_parser("binary", help="binary-bundle registry entry (raven_python.binaries)")
+    b.add_argument("--bundle", required=True, help="bundle key, e.g. 'blast'")
+    b.add_argument("--version", required=True)
+    b.add_argument("--provides", nargs="+", required=True, help="executables the bundle provides")
+    b.add_argument("--dir", required=True, type=Path, help="directory of uploaded ZIPs")
+    b.add_argument("--base-url", required=True, help="release download URL prefix")
+
+    args = parser.parse_args(argv)
+    if args.kind == "data":
+        key, entry = args.dataset, data_entry(args.dataset, args.version, args.base_url, args.dir)
+        target = "raven_python/data.py  _DATA_REGISTRY"
+    else:
+        key = args.bundle
+        entry = binary_entry(args.bundle, args.version, args.provides, args.base_url, args.dir)
+        target = "raven_python/binaries.py  _REGISTRY"
+
+    print(f"# Merge into {target}:", file=sys.stderr)
+    print(render(key, entry))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/raven_python/__init__.py b/src/raven_python/__init__.py
new file mode 100644
index 0000000..591c4c1
--- /dev/null
+++ b/src/raven_python/__init__.py
@@ -0,0 +1,10 @@
+"""raven_python — Python counterpart of the RAVEN Toolbox, built on cobrapy.
+
+raven_python reuses cobrapy for simulation, standard analyses, SBML I/O, and model
+manipulation, and provides the RAVEN-specific functionality on top: de novo
+reconstruction (KEGG / homology), context-specific modeling (tINIT / ftINIT),
+metabolic task validation, connectivity gap-filling, omics integration (HPA),
+sub-cellular localisation, N-model comparison, and the RAVEN-style I/O formats.
+"""
+
+__version__ = "0.0.1"
diff --git a/src/raven_python/analysis/__init__.py b/src/raven_python/analysis/__init__.py
new file mode 100644
index 0000000..d85afef
--- /dev/null
+++ b/src/raven_python/analysis/__init__.py
@@ -0,0 +1,23 @@
+"""Analyses not in cobrapy's core.
+
+* :func:`reporter_metabolites` — Reporter Metabolites (around-metabolite gene-score test).
+* :func:`fseof` — Flux Scanning based on Enforced Objective Flux.
+* :func:`random_sampling` — random-objective flux sampling.
+"""
+from raven_python.analysis.fseof import FSEOFResult, fseof
+from raven_python.analysis.reporter import ReporterResult, reporter_metabolites
+from raven_python.analysis.sampling import (
+    RandomSamplingResult,
+    find_good_reactions,
+    random_sampling,
+)
+
+__all__ = [
+    "FSEOFResult",
+    "RandomSamplingResult",
+    "ReporterResult",
+    "find_good_reactions",
+    "fseof",
+    "random_sampling",
+    "reporter_metabolites",
+]
diff --git a/src/raven_python/analysis/fseof.py b/src/raven_python/analysis/fseof.py
new file mode 100644
index 0000000..c5b3ee2
--- /dev/null
+++ b/src/raven_python/analysis/fseof.py
@@ -0,0 +1,161 @@
+"""Flux Scanning based on Enforced Objective Flux — FSEOF (port + redesign).
+
+FSEOF (Choi et al., Appl Environ Microbiol 2010) finds metabolic-engineering targets
+for over-producing a metabolite: enforce an increasing flux toward the target product
+while optimising growth, and watch how each reaction's flux responds. This is a port
+of RAVEN's ``FSEOF`` with a substantially richer, more robust output (RAVEN's
+weaknesses are noted in IMPROVEMENTS, FS1–FS4):
+
+* **Robust trend, not strict monotonicity.** Each reaction's flux is regressed against
+  the enforced product flux across the scan; the **slope** is the response and the
+  **correlation** (|r|) is a quality score. A reaction is a target if it tracks the
+  product cleanly (|r| ≥ ``correlation_threshold``) — one noisy step from LP
+  alternative optima no longer discards it (and pFBA per step keeps the scan stable).
+* **Direction classification RAVEN lacks.** Targets are labelled ``amplify`` (|flux|
+  rises with the product → over-express), ``knockdown`` (|flux| falls), or ``knockout``
+  (|flux| → ~0 → delete). RAVEN only ever reports the amplification targets.
+* **Gene-level view** via :attr:`FSEOFResult.gene_targets`, and the full flux scan is
+  retained in :attr:`FSEOFResult.scan` — all as DataFrames, not a printed TSV.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import cobra
+import numpy as np
+import pandas as pd
+from cobra.exceptions import OptimizationError
+from cobra.flux_analysis import pfba
+from scipy.stats import linregress
+
+
+@dataclass
+class FSEOFResult:
+    """FSEOF output.
+
+    ``scan`` is reactions × enforced-flux-levels (the full flux scan); ``enforced`` are
+    the enforced target fluxes; ``targets`` is the classified per-reaction table
+    (sorted by score). :attr:`gene_targets` aggregates targets to genes.
+    """
+
+    scan: pd.DataFrame
+    enforced: list[float]
+    targets: pd.DataFrame
+
+    @property
+    def amplification(self) -> pd.DataFrame:
+        return self.targets[self.targets["target_type"] == "amplify"].reset_index(drop=True)
+
+    @property
+    def knockout(self) -> pd.DataFrame:
+        mask = self.targets["target_type"].isin(["knockout", "knockdown"])
+        return self.targets[mask].reset_index(drop=True)
+
+    @property
+    def gene_targets(self) -> pd.DataFrame:
+        """Per-gene aggregation: the target reactions each gene is associated with."""
+        rows = []
+        for _, t in self.targets.iterrows():
+            for gene in t["genes"]:
+                rows.append({"gene": gene, "reaction": t["reaction"],
+                             "target_type": t["target_type"], "slope": t["slope"]})
+        if not rows:
+            return pd.DataFrame(columns=["gene", "target_type", "reactions", "max_abs_slope"])
+        df = pd.DataFrame(rows)
+        agg = df.groupby("gene").agg(
+            target_type=("target_type", lambda s: ";".join(sorted(set(s)))),
+            reactions=("reaction", lambda s: ";".join(sorted(set(s)))),
+            max_abs_slope=("slope", lambda s: float(np.max(np.abs(s)))),
+        ).reset_index()
+        return agg.sort_values("max_abs_slope", ascending=False, ignore_index=True)
+
+
+def fseof(
+    model: cobra.Model,
+    target_rxn: str,
+    *,
+    biomass_rxn: str | None = None,
+    n_steps: int = 10,
+    max_fraction: float = 0.9,
+    correlation_threshold: float = 0.9,
+    flux_eps: float = 1e-6,
+) -> FSEOFResult:
+    """Run FSEOF for over-production of ``target_rxn``'s product.
+
+    Enforces target flux from ``max_fraction/n_steps`` up to ``max_fraction`` of the
+    theoretical maximum in ``n_steps`` steps, maximising growth (``biomass_rxn`` or the
+    model's current objective) with pFBA at each step. Returns an :class:`FSEOFResult`.
+    """
+    with model:  # find the theoretical maximum target flux
+        model.objective = target_rxn
+        target_opt = model.slim_optimize()
+    # slim_optimize returns NaN on an infeasible model; np.isfinite catches that too.
+    if target_opt is None or not np.isfinite(target_opt) or target_opt <= flux_eps:
+        raise ValueError(f"{target_rxn!r} cannot carry positive flux; nothing to scan.")
+    target_max = target_opt * max_fraction
+    levels = [target_max * (i + 1) / n_steps for i in range(n_steps)]
+
+    columns: dict[float, pd.Series] = {}
+    enforced: list[float] = []
+    for level in levels:
+        with model:
+            if biomass_rxn is not None:
+                model.objective = biomass_rxn
+            model.reactions.get_by_id(target_rxn).lower_bound = level
+            try:
+                columns[level] = pfba(model).fluxes
+            except OptimizationError:
+                break  # enforced flux became infeasible — stop scanning
+            enforced.append(level)
+    if len(enforced) < 2:
+        raise RuntimeError("FSEOF needs at least two feasible enforced-flux levels.")
+
+    scan = pd.DataFrame(columns)
+    targets = _classify(model, scan, np.asarray(enforced), correlation_threshold, flux_eps)
+    return FSEOFResult(scan=scan, enforced=enforced, targets=targets)
+
+
+def _classify(model, scan, enforced, corr_threshold, flux_eps) -> pd.DataFrame:
+    rows = []
+    for rxn in model.reactions:
+        flux = scan.loc[rxn.id, enforced.tolist() if hasattr(enforced, "tolist") else enforced]
+        flux = flux.to_numpy(dtype=float)
+        initial, final = flux[0], flux[-1]
+        if flux.std() < flux_eps:  # flat -> no response
+            continue
+        fit = linregress(enforced, flux)
+        slope, corr = float(fit.slope), float(fit.rvalue)
+        if abs(corr) < corr_threshold or abs(slope) < flux_eps:
+            continue
+        # Classify on the slope of |flux| vs the enforced product flux — the
+        # criterion the docstring states (|flux| rises = amplify, etc.). The
+        # old endpoint-only check (``abs(final) vs abs(initial)``) could
+        # mislabel a track whose first/last values straddled a peak/trough but
+        # whose overall trend was the opposite. Keep ``knockout`` for tracks
+        # the regression drives essentially to zero.
+        abs_fit = linregress(enforced, np.abs(flux))
+        abs_slope = float(abs_fit.slope)
+        if abs(final) < flux_eps and abs_slope < 0:
+            ttype = "knockout"
+        elif abs_slope > 0:
+            ttype = "amplify"
+        else:
+            ttype = "knockdown"
+        rows.append({
+            "reaction": rxn.id,
+            "name": rxn.name,
+            "subsystem": rxn.subsystem,
+            "gene_reaction_rule": rxn.gene_reaction_rule,
+            "genes": sorted(g.id for g in rxn.genes),
+            "target_type": ttype,
+            "slope": slope,
+            "correlation": corr,
+            "initial_flux": initial,
+            "final_flux": final,
+            "score": abs(slope) * abs(corr),
+        })
+    table = pd.DataFrame(rows, columns=[
+        "reaction", "name", "subsystem", "gene_reaction_rule", "genes",
+        "target_type", "slope", "correlation", "initial_flux", "final_flux", "score",
+    ])
+    return table.sort_values("score", ascending=False, ignore_index=True)
diff --git a/src/raven_python/analysis/reporter.py b/src/raven_python/analysis/reporter.py
new file mode 100644
index 0000000..5d96d47
--- /dev/null
+++ b/src/raven_python/analysis/reporter.py
@@ -0,0 +1,117 @@
+"""Reporter Metabolites — metabolites around which transcriptional change concentrates.
+
+Patil & Nielsen, PNAS 2005. Each gene's differential-expression p-value becomes a
+Z-score ``z = -Φ⁻¹(p)``; for every metabolite the Z-scores of the genes on its
+neighbouring reactions are aggregated (``Σz / √n``), background-corrected, and turned
+back into a p-value.
+
+The background correction has an exact closed form (sampling with replacement from the
+scored-gene pool: a random ``Σz/√n`` has mean ``√n·μ`` and standard deviation ``σ``
+with μ, σ the mean/std of the scored Z-scores), so the corrected score is just
+``(metZ − √n·μ) / σ`` — no Monte-Carlo sampling needed.
+"""
+from __future__ import annotations
+
+import math
+from collections.abc import Mapping
+from dataclasses import dataclass
+
+import cobra
+import numpy as np
+import pandas as pd
+from scipy.stats import norm
+
+_CLAMP = 15.0  # |Z| cap for p-values of exactly 0 or 1 (RAVEN's ±15)
+
+
+@dataclass
+class ReporterResult:
+    """Reporter-metabolite scores for one gene set.
+
+    ``test`` is ``"all"``, ``"up"`` or ``"down"``; ``table`` is a DataFrame with
+    columns ``metabolite, name, z_score, p_value, n_genes, mean_z, std_z`` sorted by
+    descending ``z_score``.
+    """
+
+    test: str
+    table: pd.DataFrame
+
+
+def _gene_z(pvalues: dict[str, float]) -> dict[str, float]:
+    genes = list(pvalues)
+    z = -norm.ppf([pvalues[g] for g in genes])
+    z = np.where(np.isposinf(z), _CLAMP, z)
+    z = np.where(np.isneginf(z), -_CLAMP, z)
+    return dict(zip(genes, z, strict=True))
+
+
+def _reporter_one(model: cobra.Model, gene_z: dict[str, float], test: str) -> ReporterResult:
+    z_values = np.fromiter(gene_z.values(), dtype=float)
+    mu = float(z_values.mean()) if z_values.size else 0.0
+    sigma = float(z_values.std(ddof=0)) if z_values.size else 0.0
+
+    rows = []
+    for met in model.metabolites:
+        neighbours = {g.id for rxn in met.reactions for g in rxn.genes if g.id in gene_z}
+        if not neighbours:
+            continue
+        zs = np.array([gene_z[g] for g in neighbours])
+        n = zs.size
+        raw = zs.sum() / math.sqrt(n)
+        # Exact background correction for sampling-with-replacement (see module doc).
+        corrected = (raw - math.sqrt(n) * mu) / sigma if sigma > 0 else 0.0
+        rows.append(
+            {
+                "metabolite": met.id,
+                "name": met.name or met.id,
+                "z_score": corrected,
+                "p_value": float(1.0 - norm.cdf(corrected)),
+                "n_genes": n,
+                "mean_z": float(zs.mean()),
+                "std_z": float(zs.std(ddof=1)) if n > 1 else float("nan"),
+            }
+        )
+    table = pd.DataFrame(rows, columns=["metabolite", "name", "z_score", "p_value", "n_genes", "mean_z", "std_z"])
+    table = table.sort_values("z_score", ascending=False, ignore_index=True)
+    return ReporterResult(test, table)
+
+
+def reporter_metabolites(
+    model: cobra.Model,
+    gene_pvalues: Mapping[str, float],
+    *,
+    gene_fold_changes: Mapping[str, float] | None = None,
+) -> list[ReporterResult]:
+    """Compute Reporter Metabolites from per-gene differential-expression p-values.
+
+    ``gene_pvalues`` maps gene id → p-value (genes not in the model, or with a NaN or
+    out-of-``[0, 1]`` p-value, are dropped — a stray invalid p-value would otherwise
+    turn the whole result NaN). If ``gene_fold_changes`` (gene id → log fold change)
+    is given, two extra results are returned for the up- (fc ≥ 0) and down- (fc < 0)
+    regulated gene subsets, in addition to ``"all"``.
+
+    Parity with RAVEN's ``reporterMetabolites``: the ``z_score`` and underlying
+    background correction match exactly (exact closed-form instead of RAVEN's
+    Monte-Carlo, see IMPROVEMENTS RM1). The reported ``p_value`` is the
+    *one-sided* (``"up"``) enrichment ``1 - Φ(z)`` and the result is sorted by
+    ``z_score`` descending. RAVEN sorts by p-value and reports both tails
+    (``allPValues``, ``allUpPValues``, ``allDownPValues``); the up/down splits
+    here come from the ``gene_fold_changes`` subset partition instead, so the
+    same information is available via the three returned ``ReporterResult``
+    rows.
+    """
+    model_genes = {g.id for g in model.genes}
+    scored = {
+        g: float(p)
+        for g, p in gene_pvalues.items()
+        if g in model_genes and p is not None and not math.isnan(p) and 0.0 <= p <= 1.0
+    }
+    gene_z = _gene_z(scored)
+    results = [_reporter_one(model, gene_z, "all")]
+
+    if gene_fold_changes is not None:
+        up = {g: z for g, z in gene_z.items() if gene_fold_changes.get(g, 0.0) >= 0}
+        down = {g: z for g, z in gene_z.items() if gene_fold_changes.get(g, 0.0) < 0}
+        results.append(_reporter_one(model, up, "up"))
+        results.append(_reporter_one(model, down, "down"))
+    return results
diff --git a/src/raven_python/analysis/sampling.py b/src/raven_python/analysis/sampling.py
new file mode 100644
index 0000000..429b164
--- /dev/null
+++ b/src/raven_python/analysis/sampling.py
@@ -0,0 +1,207 @@
+"""Random-objective flux sampling — RAVEN's ``randomSampling`` (port + improvements).
+
+Samples the flux solution space by the **random-objective** method of Bordel et al.
+(2010, PLoS Comput Biol, doi:10.1371/journal.pcbi.1000859), as ported from RAVEN's
+``randomSampling``: each sample maximises a small random linear combination of
+reactions, so every sample is an *extreme point* (vertex) of the flux polytope.
+
+This is a different statistical object from cobrapy's ``cobra.sampling`` (OptGP /
+ACHR), which draw a (near-)uniform Markov-chain sample of the polytope *interior*.
+Use cobra's samplers when you need the uniform flux distribution; use this when you
+want a fast, robust spread of diverse optimal states — the workflow RAVEN uses to
+compare conditions, and one that stays well-behaved on large or tightly-constrained
+models where MCMC mixing is poor. cobrapy has no equivalent, so this is a genuine
+addition, not a wrapper.
+
+Improvements over RAVEN (see IMPROVEMENTS SAMP1):
+
+* **`good_reactions` via one FVA pass**, not a hand-rolled per-reaction ``parfor``
+  loop. A reaction is usable as a random objective if it can carry flux and is not
+  stuck in a stoichiometrically-infeasible loop (its range blows past the arbitrary
+  large bound). ``cobra``'s FVA computes exactly that, faster and in far less code,
+  and can optionally be made ``loopless``.
+* **Reproducible** via ``seed`` (RAVEN has no seed control).
+* **`n_objectives` is a parameter** (RAVEN hard-codes 2, though its docstring claims
+  3).
+* **Tidy output**: a ``samples`` DataFrame shaped samples × reactions (matching
+  ``cobra.sampling``), plus the reusable ``good_reactions`` list — instead of a
+  reactions × samples matrix and a parallel index vector.
+"""
+from __future__ import annotations
+
+import logging
+from collections.abc import Iterable
+from dataclasses import dataclass
+
+import cobra
+import numpy as np
+import pandas as pd
+from cobra.flux_analysis import flux_variability_analysis, pfba
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class RandomSamplingResult:
+    """Output of :func:`random_sampling`.
+
+    ``samples`` is a DataFrame of flux vectors shaped *n_samples × n_reactions*
+    (one sample per row, reaction ids as columns — the ``cobra.sampling`` layout).
+    ``good_reactions`` is the list of reaction ids that were eligible as random
+    objectives; pass it back in to skip the (one-off) FVA on a repeat run.
+    """
+
+    samples: pd.DataFrame
+    good_reactions: list[str]
+
+
+def find_good_reactions(
+    model: cobra.Model,
+    *,
+    flux_tol: float = 1e-9,
+    loopless: bool = True,
+    exclude_reactions: Iterable[str] | None = None,
+) -> list[str]:
+    """Reactions usable as random objectives: carry real (non-loop) flux.
+
+    A reaction is kept if its FVA range spans more than ``flux_tol``. With
+    ``loopless`` (default) the FVA is loopless (``cycleFreeFlux``), so reactions
+    that can carry flux *only* through a thermodynamically-infeasible cycle have a
+    ~0 loopless range and are dropped — the right test for "loopy", unlike a fixed
+    bound threshold which wrongly drops legitimate reactions that simply reach the
+    model's default (e.g. 1000) bound. Pass ``loopless=False`` for a faster, looser
+    pass that keeps any flux-carrying reaction (loops included).
+    """
+    fva = flux_variability_analysis(
+        model, fraction_of_optimum=0.0,
+        loopless="cycleFreeFlux" if loopless else None,
+    )
+    excluded = set(exclude_reactions or ())
+    return [
+        rxn_id
+        for rxn_id, lo, hi in zip(fva.index, fva["minimum"], fva["maximum"], strict=True)
+        if rxn_id not in excluded and max(abs(lo), abs(hi)) > flux_tol
+    ]
+
+
+def random_sampling(
+    model: cobra.Model,
+    n_samples: int = 1000,
+    *,
+    n_objectives: int = 2,
+    good_reactions: Iterable[str] | None = None,
+    replace_max_bound: bool = False,
+    min_flux: bool = False,
+    loopless_good_reactions: bool = True,
+    exclude_reactions: Iterable[str] | None = None,
+    max_attempts: int = 100,
+    suppress_errors: bool = False,
+    seed: int | None = None,
+) -> RandomSamplingResult:
+    """Random-objective sampling of ``model``'s flux space (Bordel et al. 2010).
+
+    Each sample maximises ``sum(w_i * v_i)`` over ``n_objectives`` reactions drawn at
+    random from ``good_reactions``, with weights ``w_i = U(0,1) * (±1)`` (a random
+    sign per reaction, as in RAVEN). The resulting flux vector is one sample.
+
+    Parameters
+    ----------
+    n_samples
+        Number of flux vectors to return.
+    n_objectives
+        Reactions combined into each random objective (RAVEN's fixed 2).
+    good_reactions
+        Reaction ids eligible as objectives. If ``None`` they are computed once with
+        :func:`find_good_reactions` and returned for reuse.
+    replace_max_bound
+        RAVEN's ``replaceBoundsWithInf``: replace the largest upper bound with
+        ``+inf`` (and the smallest negative lower bound with ``-inf``) before
+        sampling, so a reaction whose biological maximum exceeds the model's
+        arbitrary cap is not pinned at it. **Off by default** — unlike RAVEN. It
+        applies only to the sampling phase (``good_reactions`` is always found on
+        the finite bounds), and it can open unbounded directions through loops
+        that show up as large fluxes in non-objective reactions; pair it with
+        ``min_flux`` if you enable it.
+    min_flux
+        After maximising the random objective, re-solve parsimoniously
+        (:func:`cobra.flux_analysis.pfba`) to minimise total flux at that optimum —
+        squeezes residual loops out of each individual sample.
+    loopless_good_reactions, exclude_reactions
+        Forwarded to :func:`find_good_reactions` when it is invoked (loopless loop
+        detection is on by default).
+    max_attempts, suppress_errors
+        A sample is retried if the random objective is degenerate (zero flux). After
+        ``max_attempts`` failures this raises, unless ``suppress_errors`` (then the
+        degenerate solution is kept with a warning).
+    seed
+        Seed for reproducible objective draws.
+
+    Returns
+    -------
+    RandomSamplingResult
+    """
+    if n_samples <= 0:
+        raise ValueError("n_samples must be positive.")
+    rng = np.random.default_rng(seed)
+    model = model.copy()
+
+    if model.slim_optimize(error_value=None) is None:
+        raise ValueError(
+            "The model has no feasible solution, likely due to incompatible constraints."
+        )
+
+    # good_reactions must be found on the finite bounds (FVA cannot handle inf),
+    # before any bound replacement.
+    if good_reactions is None:
+        good_reactions = find_good_reactions(
+            model, loopless=loopless_good_reactions,
+            exclude_reactions=exclude_reactions,
+        )
+    good_reactions = list(good_reactions)
+
+    if replace_max_bound:
+        max_ub = max(r.upper_bound for r in model.reactions)
+        min_lb = min(r.lower_bound for r in model.reactions)
+        for r in model.reactions:
+            if r.upper_bound == max_ub:
+                r.upper_bound = float("inf")
+            if min_lb < 0 and r.lower_bound == min_lb:
+                r.lower_bound = float("-inf")
+
+    if len(good_reactions) < n_objectives:
+        raise ValueError(
+            f"Only {len(good_reactions)} usable reactions found, need at least "
+            f"n_objectives={n_objectives}. Check the model's constraints."
+        )
+
+    good_rxn_objs = [model.reactions.get_by_id(r) for r in good_reactions]
+    reaction_ids = [r.id for r in model.reactions]
+    samples = np.zeros((n_samples, len(reaction_ids)))
+
+    for i in range(n_samples):
+        for attempt in range(1, max_attempts + 1):
+            chosen = rng.choice(len(good_rxn_objs), size=n_objectives, replace=False)
+            signs = rng.choice((-1.0, 1.0), size=n_objectives)
+            weights = rng.random(n_objectives) * signs
+            terms = [w * good_rxn_objs[j].flux_expression
+                     for j, w in zip(chosen, weights, strict=True)]
+            model.objective = model.problem.Objective(sum(terms), direction="max")
+            sol = model.optimize()
+            if sol.status == "optimal" and abs(sol.objective_value) > 1e-8:
+                samples[i, :] = (pfba(model) if min_flux else sol).fluxes.reindex(reaction_ids).to_numpy()
+                break
+            if attempt == max_attempts:
+                if not suppress_errors:
+                    raise RuntimeError(
+                        "Could not find a non-zero, loop-free solution after "
+                        f"{max_attempts} attempts for sample {i}. Review the model's "
+                        "constraints, or set suppress_errors=True."
+                    )
+                logger.warning("Sample %d: kept a degenerate solution after %d attempts.",
+                               i, max_attempts)
+                samples[i, :] = sol.fluxes.reindex(reaction_ids).to_numpy()
+
+    return RandomSamplingResult(
+        samples=pd.DataFrame(samples, columns=reaction_ids),
+        good_reactions=good_reactions,
+    )
diff --git a/src/raven_python/binaries.py b/src/raven_python/binaries.py
new file mode 100644
index 0000000..2d4b5a2
--- /dev/null
+++ b/src/raven_python/binaries.py
@@ -0,0 +1,148 @@
+"""Locate and provision external command-line binaries (BLAST+, DIAMOND, …).
+
+Shared across tools (not homology-specific). Resolution order for any executable:
+
+    explicit path arg  →  env var (RAVEN_PYTHON_<TOOL>)  →  shutil.which (PATH)
+      →  ensure_binary  (download the version-pinned ZIP from a raven_python release,
+                         verify SHA256, cache, return the path)
+      →  FileNotFoundError with install guidance
+
+So a pre-installed/conda binary always wins; the bundled ZIP is the zero-setup
+fallback. See docs/maintaining_binaries.md for how the release ZIPs and the
+registry are produced and updated.
+"""
+from __future__ import annotations
+
+import hashlib
+import os
+import platform
+import shutil
+import zipfile
+from pathlib import Path
+from urllib.request import urlopen
+
+# Registry of bundled binaries. Empty until release ZIPs are published; populated
+# per docs/maintaining_binaries.md. Keyed by *bundle*; one bundle can provide
+# several executables (e.g. "blast" -> blastp + makeblastdb).
+#   bundle -> {version, provides:[exe...], platforms:{"<os>-<arch>": {url, sha256}}}
+_REGISTRY: dict = {}
+
+# Environment variable overrides per executable.
+_ENV_VARS = {
+    "diamond": "RAVEN_PYTHON_DIAMOND",
+    "blastp": "RAVEN_PYTHON_BLASTP",
+    "makeblastdb": "RAVEN_PYTHON_MAKEBLASTDB",
+    "hmmbuild": "RAVEN_PYTHON_HMMBUILD",
+    "hmmpress": "RAVEN_PYTHON_HMMPRESS",
+    "hmmsearch": "RAVEN_PYTHON_HMMSEARCH",
+    "hmmscan": "RAVEN_PYTHON_HMMSCAN",
+    "mafft": "RAVEN_PYTHON_MAFFT",
+    "cd-hit": "RAVEN_PYTHON_CDHIT",
+}
+
+
+def platform_key() -> str:
+    """Return the ``<os>-<arch>`` key used in the registry (e.g. ``linux-x86_64``)."""
+    system = {"linux": "linux", "darwin": "macos", "windows": "windows"}.get(
+        platform.system().lower(), platform.system().lower()
+    )
+    machine = platform.machine().lower()
+    arch = {"x86_64": "x86_64", "amd64": "x86_64", "arm64": "arm64", "aarch64": "arm64"}.get(
+        machine, machine
+    )
+    return f"{system}-{arch}"
+
+
+def _cache_dir() -> Path:
+    base = os.environ.get("XDG_CACHE_HOME") or (Path.home() / ".cache")
+    return Path(base) / "raven_python" / "binaries"
+
+
+def _bundle_for(executable: str, registry: dict):
+    for name, bundle in registry.items():
+        if executable in bundle.get("provides", []):
+            return name, bundle
+    return None, None
+
+
+def _sha256(path: Path) -> str:
+    h = hashlib.sha256()
+    with open(path, "rb") as fh:
+        for chunk in iter(lambda: fh.read(1 << 20), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def ensure_binary(executable: str, *, registry: dict | None = None) -> Path:
+    """Download (if needed) and return the path to a bundled ``executable``.
+
+    Consults the registry for the current platform, downloads the pinned ZIP,
+    verifies its SHA256, extracts it into the cache, and returns the executable
+    path. Raises ``FileNotFoundError`` if no bundle for this platform is hosted.
+    """
+    registry = _REGISTRY if registry is None else registry
+    bundle_name, bundle = _bundle_for(executable, registry)
+    if bundle is None:
+        raise FileNotFoundError(
+            f"No bundled binary registered for {executable!r}. Install it (e.g. "
+            f"`conda install -c bioconda {executable}`) or pass an explicit path."
+        )
+    key = platform_key()
+    entry = bundle.get("platforms", {}).get(key)
+    if entry is None:
+        raise FileNotFoundError(
+            f"No bundled {executable!r} for platform {key!r}. Install it "
+            f"(e.g. `conda install -c bioconda {executable}`), set "
+            f"{_ENV_VARS.get(executable, 'the binary path')}, or pass binary=."
+        )
+
+    dest_dir = _cache_dir() / f"{bundle_name}-{bundle['version']}-{key}"
+    exe = dest_dir / executable
+    if exe.exists():
+        return exe
+
+    dest_dir.mkdir(parents=True, exist_ok=True)
+    archive = dest_dir / "_download.zip"
+    # Download into a sibling .part file and rename on success — an interrupted
+    # download leaves the partial behind .part, never as a half-complete .zip
+    # that a later run might mistake for a finished one. Mirrors data.py.
+    part = archive.with_suffix(archive.suffix + ".part")
+    try:
+        with urlopen(entry["url"]) as resp, open(part, "wb") as out:  # noqa: S310
+            shutil.copyfileobj(resp, out)
+        digest = _sha256(part)
+        if digest != entry["sha256"]:
+            raise ValueError(
+                f"SHA256 mismatch for {executable!r} ({key}): "
+                f"expected {entry['sha256']}, got {digest}."
+            )
+        os.replace(part, archive)
+    finally:
+        part.unlink(missing_ok=True)
+    with zipfile.ZipFile(archive) as zf:
+        zf.extractall(dest_dir)
+    archive.unlink(missing_ok=True)
+    if not exe.exists():
+        raise FileNotFoundError(f"{executable!r} not found in the extracted bundle at {dest_dir}.")
+    exe.chmod(0o755)
+    return exe
+
+
+def resolve_binary(executable: str, *, binary: str | os.PathLike | None = None) -> str:
+    """Resolve an executable to a path: arg → env var → PATH → bundled ZIP → error."""
+    if binary is not None:
+        return os.fspath(binary)
+    env_var = _ENV_VARS.get(executable)
+    if env_var and os.environ.get(env_var):
+        return os.environ[env_var]
+    found = shutil.which(executable)
+    if found:
+        return found
+    try:
+        return os.fspath(ensure_binary(executable))
+    except FileNotFoundError as exc:
+        raise FileNotFoundError(
+            f"Could not find {executable!r}. Install it (e.g. "
+            f"`conda install -c bioconda {executable}`), put it on PATH, set "
+            f"{env_var or 'the binary path'}, or pass binary=. ({exc})"
+        ) from exc
diff --git a/src/raven_python/comparison/__init__.py b/src/raven_python/comparison/__init__.py
new file mode 100644
index 0000000..e4b4c19
--- /dev/null
+++ b/src/raven_python/comparison/__init__.py
@@ -0,0 +1,7 @@
+"""Structural and functional comparison across multiple models.
+
+See :func:`raven_python.comparison.compare.compare_models`.
+"""
+from raven_python.comparison.compare import ModelComparison, compare_models
+
+__all__ = ["ModelComparison", "compare_models"]
diff --git a/src/raven_python/comparison/compare.py b/src/raven_python/comparison/compare.py
new file mode 100644
index 0000000..c7d38a1
--- /dev/null
+++ b/src/raven_python/comparison/compare.py
@@ -0,0 +1,149 @@
+"""N-model structural and functional comparison.
+
+Compare two or more models — typically context-specific models extracted from the same
+template — on their reactions, metabolites, genes, subsystems, and (optionally) which
+metabolic tasks they perform. Returns tidy :class:`pandas.DataFrame`\\ s suitable for
+downstream plotting (heatmaps, tSNE/MDS, …) in seaborn / scikit-learn; plotting is
+intentionally not in this function so it stays usable inside pipelines.
+
+All matrices use the union of ids across the input models as the row index, so missing
+entries are unambiguously ``0`` / ``False`` rather than ``NaN``.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable
+from dataclasses import dataclass, field
+
+import cobra
+import pandas as pd
+
+from raven_python.tasks import Task, check_tasks
+
+
+@dataclass
+class ModelComparison:
+    """Tabular result of :func:`compare_models`.
+
+    All matrices are indexed by id (reactions/metabolites/genes/subsystems) with one
+    column per model. ``presence`` matrices are 0/1; ``subsystems`` is the per-model
+    reaction count per subsystem. ``similarity`` is the model × model Jaccard on the
+    reaction set (1 = identical, 0 = disjoint).
+    """
+
+    model_ids: list[str]
+    reactions: pd.DataFrame
+    metabolites: pd.DataFrame
+    genes: pd.DataFrame
+    subsystems: pd.DataFrame
+    similarity: pd.DataFrame
+    tasks: pd.DataFrame | None = None  # filled iff tasks were supplied
+    failed_tasks: dict[str, list[str]] = field(default_factory=dict)
+
+
+def _presence_matrix(items_per_model: list[list[str]], model_ids: list[str]) -> pd.DataFrame:
+    """Build a 0/1 DataFrame: union of items as index × one column per model."""
+    ordered: list[str] = []
+    seen: set[str] = set()
+    for items in items_per_model:
+        for it in items:
+            if it not in seen:
+                seen.add(it)
+                ordered.append(it)
+    df = pd.DataFrame(0, index=ordered, columns=model_ids, dtype="int8")
+    for mid, items in zip(model_ids, items_per_model, strict=True):
+        if items:  # avoid empty-list edge case
+            df.loc[list(set(items) & seen), mid] = 1
+    return df
+
+
+def _subsystem_counts(model: cobra.Model) -> dict[str, int]:
+    """{subsystem_name: reaction_count}. Reactions with empty subsystem fall under '(none)'."""
+    counts: dict[str, int] = {}
+    for r in model.reactions:
+        # cobra stores subsystem as a string; RAVEN sometimes uses cell-of-cells (we'd
+        # already have it as a string here, but guard against list/tuple from messy YAML).
+        sub = r.subsystem
+        if isinstance(sub, (list, tuple)):
+            sub = sub[0] if sub else ""
+        sub = (sub or "").strip() or "(none)"
+        counts[sub] = counts.get(sub, 0) + 1
+    return counts
+
+
+def _jaccard_matrix(presence: pd.DataFrame) -> pd.DataFrame:
+    """Pairwise Jaccard similarity from a 0/1 presence matrix (rows = items, cols = models)."""
+    arr = presence.values.astype(bool)
+    out = pd.DataFrame(0.0, index=presence.columns, columns=presence.columns)
+    for i, a in enumerate(presence.columns):
+        ai = arr[:, i]
+        for j, b in enumerate(presence.columns):
+            bj = arr[:, j]
+            inter = int((ai & bj).sum())
+            union = int((ai | bj).sum())
+            out.loc[a, b] = inter / union if union else 1.0
+    return out
+
+
+def compare_models(
+    models: Iterable[cobra.Model],
+    *,
+    tasks: str | Iterable[Task] | None = None,
+) -> ModelComparison:
+    """Compare N cobra models on their reactions / metabolites / genes / subsystems
+    (and tasks, if provided).
+
+    ``tasks`` is forwarded to :func:`raven_python.tasks.check_tasks` on each model; pass a
+    file path or a parsed task list. When omitted, ``ModelComparison.tasks`` is ``None``.
+
+    Models are identified by ``model.id`` (with a fallback to ``model_<i>`` if missing
+    or duplicated).
+    """
+    models_list = list(models)
+    if len(models_list) < 2:
+        raise ValueError(f"compare_models needs ≥2 models; got {len(models_list)}")
+
+    # Unique, stable model ids.
+    model_ids: list[str] = []
+    seen: set[str] = set()
+    for i, m in enumerate(models_list):
+        mid = (m.id or "").strip() or f"model_{i}"
+        base, n = mid, 2
+        while mid in seen:
+            mid, n = f"{base}__{n}", n + 1
+        seen.add(mid)
+        model_ids.append(mid)
+
+    reactions = _presence_matrix([[r.id for r in m.reactions] for m in models_list], model_ids)
+    metabolites = _presence_matrix([[x.id for x in m.metabolites] for m in models_list], model_ids)
+    genes = _presence_matrix([[g.id for g in m.genes] for m in models_list], model_ids)
+
+    # Subsystems: union of names, per-model reaction counts.
+    sub_counts = [_subsystem_counts(m) for m in models_list]
+    sub_ids = sorted({s for c in sub_counts for s in c})
+    subsystems = pd.DataFrame(0, index=sub_ids, columns=model_ids, dtype="int32")
+    for mid, c in zip(model_ids, sub_counts, strict=True):
+        for s, n in c.items():
+            subsystems.at[s, mid] = n
+
+    similarity = _jaccard_matrix(reactions)
+
+    task_df: pd.DataFrame | None = None
+    failed: dict[str, list[str]] = {}
+    if tasks is not None:
+        # raven_python.tasks.check_tasks accepts a path or an iterable of Task; preserve task
+        # ids for the index. Capture the list once so all models test the same set.
+        from raven_python.tasks.tasklist import parse_task_list
+        task_list = (parse_task_list(tasks) if isinstance(tasks, (str, bytes))
+                     or hasattr(tasks, "__fspath__") else list(tasks))
+        task_ids = [t.id for t in task_list]
+        task_df = pd.DataFrame(False, index=task_ids, columns=model_ids, dtype=bool)
+        for mid, m in zip(model_ids, models_list, strict=True):
+            results = check_tasks(m, task_list)
+            for r in results:
+                task_df.at[r.id, mid] = bool(r.passed)
+                if not r.passed and r.error:
+                    failed.setdefault(mid, []).append(f"{r.id}: {r.error}")
+
+    return ModelComparison(model_ids=model_ids, reactions=reactions, metabolites=metabolites,
+                           genes=genes, subsystems=subsystems, similarity=similarity,
+                           tasks=task_df, failed_tasks=failed)
diff --git a/src/raven_python/data.py b/src/raven_python/data.py
new file mode 100644
index 0000000..b1264be
--- /dev/null
+++ b/src/raven_python/data.py
@@ -0,0 +1,135 @@
+"""Fetch and cache published data artefacts (KEGG reference model, tables, HMMs).
+
+The mirror of :mod:`raven_python.binaries` for *data*: a version-pinned registry of
+downloadable artefacts, fetched on first use, SHA256-verified, and cached under
+platformdirs so end users never rebuild them from a KEGG dump (that is the
+maintainer's job — see docs/maintaining_kegg_data.md).
+
+Resolution for any artefact file:
+
+    explicit local dir  →  cached copy  →  download from the registry (verify,
+        cache)  →  FileNotFoundError with guidance
+
+The registry is **empty until the artefacts are published** (same as
+``binaries._REGISTRY``); until then ``ensure_data_file`` raises an actionable
+error. Cache layout::
+
+    $XDG_CACHE_HOME/raven_python/data/<dataset>-<version>/<filename>
+    (or ~/.cache/raven_python/data/... if XDG_CACHE_HOME is unset)
+"""
+from __future__ import annotations
+
+import os
+import shutil
+from pathlib import Path
+from urllib.request import urlopen
+
+from raven_python.binaries import _sha256
+
+# dataset -> {"version": str, "files": {filename: {"url": str, "sha256": str}}}
+# Populated when raven_python publishes the KEGG artefacts as release assets.
+_DATA_REGISTRY: dict = {}
+
+# The core KEGG artefacts needed to build a model (no HMM libraries).
+CORE_KEGG_FILES = (
+    "reference_model.yml.gz",
+    "ko_reaction.tsv.gz",
+    "ko_names.tsv.gz",
+    "organism_gene_ko.tsv.xz",
+    "rxn_flags.tsv.gz",
+)
+
+
+def _data_cache_dir() -> Path:
+    base = os.environ.get("XDG_CACHE_HOME") or (Path.home() / ".cache")
+    return Path(base) / "raven_python" / "data"
+
+
+def _bundle(dataset: str, registry: dict) -> dict:
+    bundle = registry.get(dataset)
+    if bundle is None:
+        raise FileNotFoundError(
+            f"No data artefacts registered for {dataset!r}. Either pass a local "
+            f"directory of artefacts, or build them per docs/maintaining_kegg_data.md."
+        )
+    return bundle
+
+
+def ensure_data_file(
+    dataset: str,
+    filename: str,
+    *,
+    version: str | None = None,
+    registry: dict | None = None,
+) -> Path:
+    """Download (if needed) and return the cached path to one artefact file.
+
+    Looks the file up in the registry for ``dataset`` (at ``version`` or the
+    registry's default), downloads it to the version-pinned cache directory,
+    verifies its SHA256, and returns the path. Re-uses an already-cached copy.
+    """
+    registry = _DATA_REGISTRY if registry is None else registry
+    bundle = _bundle(dataset, registry)
+    ver = version or bundle["version"]
+    entry = bundle.get("files", {}).get(filename)
+    if entry is None:
+        raise FileNotFoundError(
+            f"{filename!r} is not registered for {dataset!r} {ver}. "
+            f"Available: {sorted(bundle.get('files', {}))}."
+        )
+
+    dest_dir = _data_cache_dir() / f"{dataset}-{ver}"
+    dest = dest_dir / filename
+    if dest.exists():
+        return dest
+
+    dest_dir.mkdir(parents=True, exist_ok=True)
+    tmp = dest.with_name(dest.name + ".part")
+    with urlopen(entry["url"]) as resp, open(tmp, "wb") as out:  # noqa: S310 (trusted registry URLs)
+        shutil.copyfileobj(resp, out)
+    digest = _sha256(tmp)
+    if digest != entry["sha256"]:
+        tmp.unlink(missing_ok=True)
+        raise ValueError(
+            f"SHA256 mismatch for {dataset}/{filename} ({ver}): "
+            f"expected {entry['sha256']}, got {digest}."
+        )
+    tmp.replace(dest)
+    return dest
+
+
+def ensure_kegg_data(
+    *,
+    version: str | None = None,
+    files: tuple[str, ...] = CORE_KEGG_FILES,
+    registry: dict | None = None,
+) -> Path:
+    """Ensure the core KEGG artefacts are cached; return their directory.
+
+    Fetches each of ``files`` (default :data:`CORE_KEGG_FILES`) for the ``kegg``
+    dataset and returns the cache directory holding them — ready to pass as the
+    ``artefact_dir`` of :func:`get_kegg_model_for_organism_from_artefacts`.
+    """
+    registry = _DATA_REGISTRY if registry is None else registry
+    ver = version or _bundle("kegg", registry)["version"]
+    for filename in files:
+        ensure_data_file("kegg", filename, version=ver, registry=registry)
+    return _data_cache_dir() / f"kegg-{ver}"
+
+
+def ensure_kegg_hmm_library(
+    domain: str, *, version: str | None = None, registry: dict | None = None
+) -> Path:
+    """Ensure a domain HMM library (and its hmmpress index) is cached; return its path.
+
+    ``domain`` is ``"prokaryotes"`` or ``"eukaryotes"``. Fetches ``<domain>.hmm``
+    plus the ``hmmpress`` sidecar files (``.h3f/.h3i/.h3m/.h3p``) and returns the
+    path to the ``.hmm`` (the argument for :func:`run_hmmscan`).
+    """
+    registry = _DATA_REGISTRY if registry is None else registry
+    ver = version or _bundle("kegg", registry)["version"]
+    base = f"{domain}.hmm"
+    library = ensure_data_file("kegg", base, version=ver, registry=registry)
+    for suffix in (".h3f", ".h3i", ".h3m", ".h3p"):
+        ensure_data_file("kegg", base + suffix, version=ver, registry=registry)
+    return library
diff --git a/src/raven_python/gapfilling/__init__.py b/src/raven_python/gapfilling/__init__.py
new file mode 100644
index 0000000..747b293
--- /dev/null
+++ b/src/raven_python/gapfilling/__init__.py
@@ -0,0 +1,9 @@
+"""Connectivity gap-filling against template models.
+
+:func:`connect_blocked_reactions` adds the fewest (lowest-penalty) template reactions so
+reactions blocked in a draft can carry flux. For the other gap-fill flavour (fill until
+the objective is feasible) use ``cobra.flux_analysis.gapfill``.
+"""
+from raven_python.gapfilling.fill import GapFillResult, connect_blocked_reactions
+
+__all__ = ["GapFillResult", "connect_blocked_reactions"]
diff --git a/src/raven_python/gapfilling/fill.py b/src/raven_python/gapfilling/fill.py
new file mode 100644
index 0000000..ba3418d
--- /dev/null
+++ b/src/raven_python/gapfilling/fill.py
@@ -0,0 +1,172 @@
+"""Connectivity gap-filling: add the fewest template reactions so reactions that are
+*blocked* in a draft can carry flux.
+
+For the other gap-filling flavour (add the fewest template reactions until the model's
+own objective becomes feasible) use ``cobra.flux_analysis.gapfill`` — just align the
+template's metabolite ids to the draft first, since cobra matches by id.
+
+It solves an MILP: pick the minimum-penalty subset of template reactions such that the
+blocked (irreversible) draft reactions can carry flux at steady state. Template
+metabolites are matched to the draft by ``name[compartment]`` (via
+:func:`add_reactions_from_model`), so templates in a different identifier namespace
+than the model still work. Per-reaction ``scores`` (higher = prefer to include) map to
+RAVEN's ``rxnScores``; the MILP minimises the penalty ``-score`` (default penalty
+``1.0``, i.e. minimise the number of reactions added).
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable
+from dataclasses import dataclass
+
+import cobra
+from cobra.flux_analysis import find_blocked_reactions, flux_variability_analysis
+
+from raven_python.manipulation.transfer import add_reactions_from_model
+
+
+@dataclass
+class GapFillResult:
+    """Outcome of a connectivity gap-fill.
+
+    ``added_reactions`` are the template reaction ids added to ``model``;
+    ``newly_connected`` are draft reactions that were blocked but can now carry flux;
+    ``cannot_connect`` are blocked reactions left unconnectable.
+    """
+
+    added_reactions: list[str]
+    newly_connected: list[str]
+    cannot_connect: list[str]
+    model: cobra.Model
+
+
+def _as_models(templates: cobra.Model | Iterable[cobra.Model]) -> list[cobra.Model]:
+    return [templates] if isinstance(templates, cobra.Model) else list(templates)
+
+
+def _merge_templates(model: cobra.Model, templates: list[cobra.Model]) -> tuple[cobra.Model, list[str]]:
+    """Copy every template reaction (new ones only) into a working copy of ``model``.
+
+    Returns the working model and the ids of the reactions that came from templates
+    (the gap-fill candidates). Metabolites are matched by ``name[compartment]``.
+    """
+    working = model.copy()
+    template_ids: list[str] = []
+    for template in templates:
+        new = [r.id for r in template.reactions if r.id not in working.reactions]
+        if new:
+            added = add_reactions_from_model(working, template, new, genes=False, note=None)
+            template_ids += [r.id for r in added]
+    return working, template_ids
+
+
+def _solve_min_templates(
+    working: cobra.Model,
+    template_ids: list[str],
+    *,
+    scores: dict[str, float] | None,
+    penalty: float,
+    allow_net_production: bool,
+) -> set[str] | None:
+    """MILP: minimum-penalty template reactions making ``working`` feasible.
+
+    The requirement (here, forced flux through the blocked reactions) must already be
+    imposed on ``working``. Returns the template reaction ids to keep, or ``None`` if
+    the problem is infeasible.
+    """
+    prob = working.problem
+    indicators: dict[str, object] = {}
+    extra = []
+    for rid in template_ids:
+        rxn = working.reactions.get_by_id(rid)
+        y = prob.Variable(f"_gf_keep_{rid}", type="binary")
+        indicators[rid] = y
+        # Flux is confined to [lb*y, ub*y]: zero unless the reaction is kept (y=1).
+        extra.append(prob.Constraint(rxn.flux_expression - rxn.upper_bound * y, ub=0, name=f"_gf_ub_{rid}"))
+        extra.append(prob.Constraint(rxn.flux_expression - rxn.lower_bound * y, lb=0, name=f"_gf_lb_{rid}"))
+    working.add_cons_vars(list(indicators.values()) + extra)
+
+    if allow_net_production:  # relax steady state to Sv >= 0 (mets may accumulate)
+        for met in working.metabolites:
+            working.constraints[met.id].ub = None
+
+    def pen(rid: str) -> float:
+        return -scores[rid] if scores and rid in scores else penalty
+
+    working.objective = prob.Objective(
+        sum(pen(rid) * indicators[rid] for rid in template_ids), direction="min"
+    )
+    working.slim_optimize()
+    if working.solver.status != "optimal":
+        return None
+    return {rid for rid, y in indicators.items() if (y.primal or 0) > 0.5}
+
+
+def _build_filled(model: cobra.Model, templates: list[cobra.Model], chosen: set[str]) -> cobra.Model:
+    filled = model.copy()
+    remaining = set(chosen)
+    for template in templates:
+        ids = [r for r in remaining if r in template.reactions]
+        if ids:
+            add_reactions_from_model(filled, template, ids, genes=False, note="Added by connect_blocked_reactions")
+            remaining -= set(ids)
+    return filled
+
+
+def connect_blocked_reactions(
+    model: cobra.Model,
+    templates: cobra.Model | Iterable[cobra.Model],
+    *,
+    scores: dict[str, float] | None = None,
+    penalty: float = 1.0,
+    allow_net_production: bool = False,
+    eps: float = 1.0,
+) -> GapFillResult:
+    """Add template reactions so blocked draft reactions can carry flux.
+
+    Finds reactions that
+    cannot carry flux in ``model``, then adds the minimum-penalty set of template
+    reactions that lets the (irreversible) ones carry flux, and returns the filled
+    model. Like RAVEN, only irreversible blocked reactions are forced — reversible
+    ones can carry flux trivially in the split formulation, so forcing them is
+    uninformative.
+
+    For the *other* gap-filling flavour — adding reactions to make the model's
+    objective feasible — use ``cobra.flux_analysis.gapfill`` after aligning the
+    template's metabolite ids to the draft.
+
+    The draft is expected to have exchange reactions for its nutrients (otherwise most
+    reactions are trivially blocked).
+    """
+    templates = _as_models(templates)
+    blocked = set(find_blocked_reactions(model))
+    candidates = [r for r in blocked if model.reactions.get_by_id(r).lower_bound >= 0]
+
+    working, template_ids = _merge_templates(model, templates)
+
+    target: list[str] = []
+    if candidates:
+        fva = flux_variability_analysis(working, reaction_list=candidates, fraction_of_optimum=0.0)
+        # A reaction can be missing from the FVA frame if the solver dropped it
+        # (e.g. the reaction was eliminated upstream); treat that as "unreachable"
+        # rather than letting the KeyError propagate.
+        target = [
+            r for r in candidates
+            if r in fva.index and fva.at[r, "maximum"] > eps
+        ]
+
+    cannot = sorted(blocked - set(target))
+    if not target:
+        return GapFillResult([], [], cannot, model.copy())
+
+    for rid in target:
+        working.reactions.get_by_id(rid).lower_bound = eps
+    chosen = _solve_min_templates(
+        working, template_ids, scores=scores, penalty=penalty,
+        allow_net_production=allow_net_production,
+    )
+    if chosen is None:
+        raise RuntimeError(
+            "Gap-filling is infeasible: the blocked reactions cannot all carry flux "
+            "even with every template reaction added."
+        )
+    return GapFillResult(sorted(chosen), sorted(target), cannot, _build_filled(model, templates, chosen))
diff --git a/src/raven_python/init/__init__.py b/src/raven_python/init/__init__.py
new file mode 100644
index 0000000..040f299
--- /dev/null
+++ b/src/raven_python/init/__init__.py
@@ -0,0 +1,46 @@
+"""Context-specific model extraction (tINIT / ftINIT).
+
+tINIT:
+* :func:`run_init` — the classic INIT MILP.
+* :func:`score_reactions_from_genes` / :func:`gene_scores_from_expression` —
+  gene → reaction scoring (RNA-seq is the common upstream).
+* :func:`get_init_model` — the tINIT pipeline (dead-end removal + ``run_init``).
+
+ftINIT (faster, staged):
+* :func:`run_ftinit` — the single-step ftINIT MILP (continuous indicators for
+  positive-score reactions; binaries only on negatives — the speedup over ``run_init``).
+* :func:`ftinit` — the full pipeline (``prep_init_model`` → staged ``run_ftinit`` →
+  ``fill_tasks`` → ``remove_low_score_genes``).
+"""
+from raven_python.init.build import InitModelResult, get_init_model
+from raven_python.init.ftinit import FtInitResult, ftinit, run_ftinit
+from raven_python.init.genes import remove_low_score_genes
+from raven_python.init.init import InitResult, run_init
+from raven_python.init.merge import group_rxn_scores, merge_linear
+from raven_python.init.prep import PrepData, ReactionMasks, classify_reactions, prep_init_model
+from raven_python.init.score import gene_scores_from_expression, score_reactions_from_genes
+from raven_python.init.steps import InitStep, get_init_steps
+from raven_python.init.taskfill import TaskFillResult, fill_tasks
+
+__all__ = [
+    "FtInitResult",
+    "InitModelResult",
+    "InitResult",
+    "InitStep",
+    "PrepData",
+    "ReactionMasks",
+    "TaskFillResult",
+    "classify_reactions",
+    "fill_tasks",
+    "ftinit",
+    "gene_scores_from_expression",
+    "get_init_model",
+    "get_init_steps",
+    "group_rxn_scores",
+    "merge_linear",
+    "prep_init_model",
+    "remove_low_score_genes",
+    "run_ftinit",
+    "run_init",
+    "score_reactions_from_genes",
+]
diff --git a/src/raven_python/init/build.py b/src/raven_python/init/build.py
new file mode 100644
index 0000000..a0d0538
--- /dev/null
+++ b/src/raven_python/init/build.py
@@ -0,0 +1,113 @@
+"""tINIT model building — high-level pipeline.
+
+Turn expression-derived scores into reaction scores (via the GPR), drop reactions that
+cannot carry flux, then run the INIT MILP to extract a context-specific model. Pass
+gene scores (typically from :func:`gene_scores_from_expression` or one of the omics
+loaders) or reaction scores directly. ``essential_rxns`` are forced kept.
+
+For task-aware gap-filling on top of the resulting model, use ftINIT
+(:func:`raven_python.init.ftinit`); ``get_init_model`` itself does not run the task layer.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from dataclasses import dataclass
+
+import cobra
+from cobra.flux_analysis import find_blocked_reactions
+
+from raven_python.init.init import run_init
+from raven_python.init.score import score_reactions_from_genes
+
+
+@dataclass
+class InitModelResult:
+    """Result of :func:`get_init_model`."""
+
+    model: cobra.Model
+    reaction_scores: dict[str, float]
+    deleted_dead_end_reactions: list[str]
+    deleted_in_init: list[str]
+    met_production: dict[str, bool]
+    objective: float
+
+
+def get_init_model(
+    ref_model: cobra.Model,
+    *,
+    rxn_scores: Mapping[str, float] | None = None,
+    gene_scores: Mapping[str, float] | None = None,
+    isozyme_scoring: str = "max",
+    complex_scoring: str = "min",
+    no_gene_score: float = -2.0,
+    essential_rxns: Iterable[str] | None = None,
+    present_mets: Iterable[str] | None = None,
+    prod_weight: float = 0.5,
+    allow_excretion: bool = True,
+    no_rev_loops: bool = False,
+    remove_dead_ends: bool = True,
+    eps: float = 1.0,
+    big_m: float | None = None,
+    mip_gap: float | None = None,
+    time_limit: float | None = None,
+) -> InitModelResult:
+    """Extract a context-specific model with tINIT.
+
+    Provide either ``rxn_scores`` (reaction id → score) or ``gene_scores`` (gene id →
+    score, converted via the GPR with :func:`score_reactions_from_genes`). Reactions
+    that cannot carry flux (with exchanges open) are removed first unless
+    ``remove_dead_ends=False``; ``essential_rxns`` are kept regardless. The remaining
+    model is passed to :func:`run_init`.
+    """
+    if (rxn_scores is None) == (gene_scores is None):
+        raise ValueError("Provide exactly one of rxn_scores or gene_scores.")
+
+    model = ref_model.copy()
+    essential = set(essential_rxns or [])
+    if gene_scores is not None:
+        scores = score_reactions_from_genes(
+            model, gene_scores, isozyme_scoring=isozyme_scoring,
+            complex_scoring=complex_scoring, no_gene_score=no_gene_score,
+        )
+    else:
+        scores = dict(rxn_scores)
+
+    deleted_dead_end: list[str] = []
+    if remove_dead_ends:
+        # Identify and drop reactions that cannot carry flux even under the
+        # *most permissive* boundary regime: every metabolite open for excretion
+        # (when ``allow_excretion``) plus the exchange-opened FVA. That makes
+        # the pre-filter conservative — only reactions blocked under both lax
+        # and strict regimes are removed, so the strict run_init path never
+        # loses a candidate it could have used.
+        probe = model.copy()
+        original_ids = {r.id for r in model.reactions}
+        if allow_excretion:
+            has_boundary = {m.id for r in probe.boundary for m in r.metabolites}
+            for met in list(probe.metabolites):
+                if met.id not in has_boundary:
+                    probe.add_boundary(met, type="demand")
+        blocked = set(find_blocked_reactions(probe, open_exchanges=True))
+        deleted_dead_end = sorted((blocked & original_ids) - essential)
+        model.remove_reactions(deleted_dead_end, remove_orphans=True)
+
+    result = run_init(
+        model, scores,
+        present_mets=present_mets,
+        essential_rxns=essential & {r.id for r in model.reactions},
+        prod_weight=prod_weight,
+        allow_excretion=allow_excretion,
+        no_rev_loops=no_rev_loops,
+        eps=eps,
+        big_m=big_m,
+        mip_gap=mip_gap,
+        time_limit=time_limit,
+    )
+    return InitModelResult(
+        model=result.model,
+        reaction_scores=scores,
+        deleted_dead_end_reactions=deleted_dead_end,
+        deleted_in_init=result.deleted_reactions,
+        met_production=result.met_production,
+        objective=result.objective,
+    )
diff --git a/src/raven_python/init/ftinit.py b/src/raven_python/init/ftinit.py
new file mode 100644
index 0000000..b355e45
--- /dev/null
+++ b/src/raven_python/init/ftinit.py
@@ -0,0 +1,328 @@
+"""The ftINIT MILP — the faster staged variant of INIT.
+
+ftINIT keeps tINIT's objective — pick the reaction subset best matching expression
+scores while staying flux-consistent — but with a cheaper MILP encoding that is the
+reason it is *fast*: a **positive-score reaction needs no binary**. Because the
+objective *maximises* ``Σ score·y`` with ``score > 0``, the optimiser pushes its
+continuous indicator ``y ∈ [0,1]`` to 1, and the gate ``net_flux ≥ force_on·y`` only
+lets ``y`` reach 1 if the reaction can actually carry flux. Only *negative*-score
+reactions need a true ``{0,1}`` binary (their indicator would otherwise sit at 0 for
+free). This roughly halves the integer count — the dominant MILP cost.
+
+Reaction categories (RAVEN's six), by score sign × reversibility:
+
+* **score 0** — left in the model, *not* in the problem: a free flux variable that can
+  carry flux for connectivity but is neither scored nor removable.
+* **positive, irreversible** — continuous ``y∈[0,1]``; ``v ≥ force_on·y``. No binary.
+* **positive, reversible** — split ``v = v⁺ − v⁻``; continuous ``y``; a single
+  direction binary keeps one of ``v⁺/v⁻`` at 0 (no fwd/back loop faking "on");
+  ``v⁺+v⁻ ≥ force_on·y``.
+* **negative, irreversible** — binary ``x∈{0,1}``; ``v ≤ ub·x``.
+* **negative, reversible** — split; binary ``x``; ``v⁺+v⁻ ≤ cap·x``.
+* **essential** — forced on (``v ≥ force_on_ess``); no indicator. Assumed already
+  oriented irreversible in its forced direction (``prepINITModel`` does this).
+
+Objective: **maximise** ``Σ score·indicator``. Unlike classic INIT
+(:func:`raven_python.init.run_init`), ftINIT does **not** reward production of every
+metabolite — ``prod_weight`` applies only to metabolomics-detected metabolites (not
+yet implemented; passing a non-empty ``metabolomics`` argument raises
+``NotImplementedError``). Connectivity comes solely from the flux gates plus any
+essential reactions. ``allow_excretion`` relaxes ``S·v = 0`` to ``≥ 0``; ``rem_pos_rev``
+drops positive reversible reactions from the problem (used in the staging schedule).
+
+Needs a MILP solver (cobra's configured optlang solver; only Gurobi is fully viable at
+genome scale — see ``docs/init_solver_benchmark.md``). Magic numbers
+(``force_on``/``force_on_ess`` = 0.1, ``big_m`` = 100) are exposed and scale-dependent;
+calibration tables are in ``docs/init_param_calibration.md``. ``big_m`` caps a *scored*
+reaction's flux in its on/off (direction) constraint — using a fixed 100 rather than
+the reaction's ±1000 bound keeps the LP relaxation tight (what makes the genome-scale
+MILP tractable). Free / essential reactions keep their real bounds.
+
+⚠️ **Loops.** The MILP has *no* loopless constraint: an internal
+thermodynamically-infeasible cycle is flux-consistent (``S·v = 0``), so if its
+reactions carry positive net score the optimiser will "include" them with no real
+exchange flux. RAVEN tolerates this — loop-free models come from the staged pipeline
++ exchange handling, and at genome scale real exchange reactions make such cycles not
+score-optimal. A loopless option could be layered on later if needed.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from dataclasses import dataclass, field
+
+import cobra
+from optlang.symbolics import Real, add, mul
+
+from raven_python.init.genes import remove_low_score_genes
+from raven_python.init.merge import group_rxn_scores
+from raven_python.init.steps import get_init_steps
+from raven_python.init.taskfill import fill_tasks
+
+_FORCE_ON = 0.1  # min flux for a reaction to count as "on" (RAVEN forceOnLim)
+_BIG_M = 100.0   # indicator/direction big-M cap on a *scored* reaction's flux (RAVEN's 100)
+
+
+@dataclass
+class FtInitResult:
+    """Result of :func:`run_ftinit`."""
+
+    model: cobra.Model
+    kept_reactions: list[str]
+    deleted_reactions: list[str]
+    fluxes: dict[str, float]
+    objective: float
+    on_reactions: set[str] = field(default_factory=set)  # scored reactions turned on (indicator)
+
+
+def run_ftinit(
+    model: cobra.Model,
+    rxn_scores: Mapping[str, float] | None = None,
+    *,
+    essential_rxns: Iterable[str] | None = None,
+    essential_directions: Mapping[str, int] | None = None,
+    essential_force: Mapping[str, float] | None = None,
+    allow_excretion: bool = False,
+    rem_pos_rev: bool = False,
+    ignore_mets: Iterable[str] = (),
+    force_on: float = _FORCE_ON,
+    force_on_ess: float = _FORCE_ON,
+    big_m: float = _BIG_M,
+    mip_gap: float | None = None,
+    time_limit: float | None = None,
+) -> FtInitResult:
+    """Run the single-step ftINIT MILP and return the extracted model.
+
+    ``rxn_scores`` maps reaction id → score (default 0 → reaction left free in the
+    model, not scored or removable). ``essential_rxns`` are forced to carry flux
+    (≥ ``force_on_ess``); ``essential_directions`` maps an essential reaction id to
+    ``+1`` (forward) or ``-1`` (reverse) for the forced direction (default forward).
+    ``ignore_mets`` are metabolite **names** whose mass balance is dropped (RAVEN's
+    per-step "simple metabolite" removal, e.g. H2O/H+). See the module docstring for
+    the formulation. This is the single-step variant; the staged schedule
+    (:func:`raven_python.init.ftinit`) calls it per step.
+    """
+    scores = dict(rxn_scores or {})
+    essential = set(essential_rxns or [])
+    directions = dict(essential_directions or {})
+    essential_force = dict(essential_force or {})
+    ignore_met_names = set(ignore_mets)
+    prob = model.problem
+    opt = prob.Model()
+
+    variables: list = []
+    constraints: list = []
+    flux_terms: dict[str, list[tuple[object, float]]] = {}  # rxn id -> [(var, sign)]
+    indicators: dict[str, tuple[object, float]] = {}  # rxn id -> (indicator var, score)
+    free_or_essential: set[str] = set()               # kept regardless of an indicator
+
+    def add_constraint(expr, **kw):
+        constraints.append(prob.Constraint(expr, **kw))
+
+    for rxn in model.reactions:
+        rid = rxn.id
+        lb, ub = rxn.lower_bound, rxn.upper_bound
+        score = float(scores.get(rid, 0.0))
+        if rem_pos_rev and score > 0 and lb < 0 < ub:
+            score = 0.0  # staging step 1: positive reversibles dropped from the problem
+
+        if rid in essential:
+            # Forced to carry flux in its forced direction (default forward); respect a
+            # stricter native bound if the model already forces more flux. The forced
+            # magnitude may be set per reaction (RAVEN's min(0.99·|prev flux|, 0.1), so
+            # a reaction is never forced above what it carried before).
+            force = essential_force.get(rid, force_on_ess) if essential_force else force_on_ess
+            if directions.get(rid, 1) >= 0:
+                forced = min(force, ub)  # clamp to capacity so we never make lb > ub
+                v = prob.Variable(f"v_{rid}", lb=max(forced, lb, 0.0), ub=ub)
+            else:  # reverse: flux ≤ -force
+                forced = min(force, -lb)
+                v = prob.Variable(f"v_{rid}", lb=lb, ub=min(-forced, ub))
+            variables.append(v)
+            flux_terms[rid] = [(v, 1.0)]
+            free_or_essential.add(rid)
+            continue
+
+        if score == 0.0:  # free: carries flux for connectivity, not scored/removable
+            v = prob.Variable(f"v_{rid}", lb=lb, ub=ub)
+            variables.append(v)
+            flux_terms[rid] = [(v, 1.0)]
+            free_or_essential.add(rid)
+            continue
+
+        reversible = lb < 0 < ub
+        if reversible:
+            vp = prob.Variable(f"vp_{rid}", lb=0.0, ub=ub)
+            vn = prob.Variable(f"vn_{rid}", lb=0.0, ub=-lb)
+            variables += [vp, vn]
+            flux_terms[rid] = [(vp, 1.0), (vn, -1.0)]
+            total = vp + vn  # |flux| (one of vp/vn pinned to 0 below), used by the gates
+        else:  # single-direction: keep the model's own [lb, ub] (incl. any forced lb>0)
+            v = prob.Variable(f"v_{rid}", lb=lb, ub=ub)
+            variables.append(v)
+            flux_terms[rid] = [(v, 1.0)]
+            total = v if ub > 0 else -v  # magnitude for a single-direction reaction
+
+        if score > 0:
+            y = prob.Variable(f"y_{rid}", lb=0.0, ub=1.0)  # continuous indicator, no binary
+            variables.append(y)
+            indicators[rid] = (y, score)
+            add_constraint(total - force_on * y, lb=0.0, name=f"on_{rid}")  # y=1 ⇒ |flux| ≥ force_on
+            if reversible:  # one direction binary stops a fwd/back loop faking "on"
+                b = prob.Variable(f"b_{rid}", type="binary")
+                variables.append(b)
+                add_constraint(vp - big_m * b, ub=0.0, name=f"dirp_{rid}")          # vp ≤ M·b
+                add_constraint(vn + big_m * b, ub=big_m, name=f"dirn_{rid}")        # vn ≤ M·(1-b)
+        else:  # score < 0
+            x = prob.Variable(f"x_{rid}", type="binary")
+            variables.append(x)
+            indicators[rid] = (x, score)
+            add_constraint(total - big_m * x, ub=0.0, name=f"off_{rid}")  # flux>0 ⇒ x=1
+
+    # Steady state S·v {== 0 | >= 0}; ignored metabolites are left unbalanced.
+    # Build each metabolite's balance as a *flat* list of (coeff·sign)·var terms and sum
+    # it with optlang.symbolics.add. Python's builtin sum re-canonicalises a growing
+    # sympy expression at every step (O(n²)); for hub metabolites that appear in ~10³
+    # reactions that is minutes per constraint. add() builds the sum in one pass.
+    met_terms: dict = {m: [] for m in model.metabolites if m.name not in ignore_met_names}
+    for rxn in model.reactions:
+        terms = flux_terms[rxn.id]
+        for met, coeff in rxn.metabolites.items():
+            bucket = met_terms.get(met)
+            if bucket is None:
+                continue
+            for var, sign in terms:
+                bucket.append(mul([Real(coeff * sign), var]))
+    for termlist in met_terms.values():
+        if termlist:
+            add_constraint(add(termlist), lb=0.0, ub=None if allow_excretion else 0.0)
+
+    opt.add(variables + constraints)
+    opt.objective = prob.Objective(
+        add([mul([Real(score), ind]) for ind, score in indicators.values()]), direction="max"
+    )
+    if time_limit is not None:
+        opt.configuration.timeout = int(time_limit)
+    if mip_gap is not None:
+        try:  # Gurobi-specific; harmless if the backend differs
+            opt.problem.Params.MIPGap = mip_gap
+        except Exception:  # noqa: BLE001
+            pass
+    opt.optimize()
+    # Accept a near-optimal incumbent (when a MIP gap / time limit is set), as RAVEN does.
+    if opt.status not in ("optimal", "feasible", "suboptimal", "time_limit"):
+        raise RuntimeError(f"ftINIT MILP did not solve (status: {opt.status}).")
+
+    # RAVEN: a reaction is "on" iff its indicator ≥ 0.5 (positive indicators are
+    # continuous and can land fractionally when a reaction can carry only tiny flux).
+    on = {rid for rid, (ind, _) in indicators.items() if (ind.primal or 0.0) >= 0.5}
+    kept = free_or_essential | on
+    deleted = [r.id for r in model.reactions if r.id not in kept]
+    fluxes = {
+        rid: sum(sign * (var.primal or 0.0) for var, sign in terms)
+        for rid, terms in flux_terms.items()
+    }
+
+    out = model.copy()
+    out.remove_reactions(deleted, remove_orphans=True)
+    return FtInitResult(out, sorted(kept), sorted(deleted), fluxes,
+                        float(opt.objective.value), on_reactions=on)
+
+
+def ftinit(
+    prep,
+    rxn_scores: Mapping[str, float],
+    *,
+    gene_scores: Mapping[str, float] | None = None,
+    series: str = "1+1",
+    steps=None,
+    fill_gaps: bool = True,
+    metabolomics: Iterable[str] | None = None,
+    force_on: float = _FORCE_ON,
+    big_m: float = _BIG_M,
+    mip_gap: float | None = None,
+    time_limit: float | None = None,
+) -> cobra.Model:
+    """Run the full ftINIT pipeline on prepData and return the context-specific model.
+
+    ``prep`` is a :class:`raven_python.init.PrepData`. ``rxn_scores`` maps **original**
+    reaction id → score (e.g. from :func:`score_reactions_from_genes` on the template).
+    Each step (:func:`raven_python.init.get_init_steps`) regroups scores under its
+    ``ignore_mask``, fixes the reactions turned on by earlier steps as essential (in
+    their flux direction), and solves :func:`run_ftinit` on the merged model. Reactions
+    never turned on (and not essential or left-in) are removed from the reference model;
+    exchange reactions are always kept (RAVEN re-adds them).
+
+    If ``fill_gaps`` and ``prep`` carries tasks, reactions are added back so every task
+    is feasible (:func:`raven_python.init.fill_tasks`). If ``gene_scores`` is given,
+    negative-scoring genes are pruned from the GPRs at the end
+    (:func:`raven_python.init.remove_low_score_genes`).
+
+    Essential reactions are forced to carry ``force_on`` (default 0.1) of flux in the
+    forced direction. On genome-scale models a stricter regime is needed (the previous
+    step's actual carried flux instead of a flat 0.1) — exposed via per-reaction
+    ``essential_force`` on :func:`run_ftinit`.
+
+    ``metabolomics`` (a list of detected metabolite names to reward producing) is
+    **not yet implemented**: the linear merge eliminates degree-2 detected metabolites,
+    so it needs a producer-group-mapping + negative-producer force-flux block — the
+    most intricate MILP piece, for the least-used input. Passing a non-empty value
+    raises ``NotImplementedError``.
+
+    ``mip_gap``/``time_limit`` are forwarded to each :func:`run_ftinit` solve. On
+    genome-scale models they are essential for tractability — see
+    ``docs/init_param_calibration.md`` for the calibration table.
+    """
+    if metabolomics:
+        raise NotImplementedError(
+            "metabolomics production-bonus is not yet implemented."
+        )
+    steps = steps if steps is not None else get_init_steps(series)
+    min_model, group_of = prep.min_model, prep.group_of
+
+    turned_on: dict[str, float] = {}   # merged reaction id -> flux (accumulated)
+    left_in: set[str] = set()          # merged reactions with score 0 in the last step
+    for step in steps:
+        to_zero = prep.masks.ignored(step.ignore_mask)
+        scores = group_rxn_scores(min_model, rxn_scores, prep.orig_rxn_ids,
+                                  prep.group_ids, to_zero)
+        essential = set(prep.essential_rxns)  # pre-oriented forward (default direction)
+        directions: dict[str, int] = {}
+        ess_force: dict[str, float] = {}
+        if step.how_to_use_prev == "essential":
+            for rid, flux in turned_on.items():
+                essential.add(rid)
+                directions[rid] = 1 if flux >= 0 else -1
+                # never force more flux than the reaction carried before (RAVEN)
+                ess_force[rid] = min(abs(flux) * 0.99, force_on)
+        res = run_ftinit(
+            min_model, scores, essential_rxns=essential, essential_directions=directions,
+            essential_force=ess_force, allow_excretion=step.allow_met_secr,
+            rem_pos_rev=step.pos_rev_off, ignore_mets=step.mets_to_ignore,
+            force_on=force_on, force_on_ess=force_on, big_m=big_m,
+            mip_gap=mip_gap, time_limit=time_limit,
+        )
+        for rid in res.on_reactions:
+            turned_on[rid] = res.fluxes[rid]
+        left_in = {rid for rid, s in scores.items() if s == 0.0}
+
+    # Merged reactions to keep: turned on + permanently essential + left-in (score 0).
+    kept_min = set(turned_on) | set(prep.essential_rxns) | left_in
+    deleted_min = [r.id for r in min_model.reactions if r.id not in kept_min]
+
+    # Map deleted merged reactions back to all originals in their groups.
+    removed_groups = {group_of[rid] for rid in deleted_min if group_of[rid] != 0}
+    to_remove = {o for o in prep.orig_rxn_ids if group_of[o] and group_of[o] in removed_groups}
+    to_remove |= {rid for rid in deleted_min if group_of[rid] == 0}  # unmerged
+    # Keep the surviving originals plus all exchange reactions (always re-added).
+    final_kept = (set(prep.orig_rxn_ids) - to_remove) | prep.masks.exchange
+
+    out = prep.ref_model.copy()
+    out.remove_reactions([r.id for r in out.reactions if r.id not in final_kept],
+                         remove_orphans=True)
+
+    if fill_gaps and prep.tasks:  # add reactions back so every task is feasible
+        out = fill_tasks(out, prep.ref_model, prep.tasks, rxn_scores=rxn_scores,
+                         mip_gap=mip_gap, time_limit=time_limit).model
+    if gene_scores is not None:   # prune negative-scoring genes from the GPRs
+        out, _ = remove_low_score_genes(out, gene_scores)
+    return out
diff --git a/src/raven_python/init/genes.py b/src/raven_python/init/genes.py
new file mode 100644
index 0000000..ceed3da
--- /dev/null
+++ b/src/raven_python/init/genes.py
@@ -0,0 +1,85 @@
+"""Prune low-scoring genes from a model — the last ftINIT step.
+
+Drop negative-scoring genes from each reaction's GPR, while
+respecting enzyme structure — genes joined by **OR** (isozymes) are candidates for
+removal, but at least one must remain (the least-negative if all are negative);
+genes joined by **AND** (complex subunits) are *not* removed individually, though a
+whole complex can be dropped as one isozyme alternative if its (aggregated) score is
+negative. Operates on cobra's GPR AST recursively, so nested rules like
+``G1 and (G2 or G3) and G4`` prune the inner isozyme group correctly.
+"""
+from __future__ import annotations
+
+import ast
+import statistics
+from collections.abc import Mapping
+
+import cobra
+from cobra.manipulation import remove_genes
+
+_AGG = {"min": min, "max": max, "median": statistics.median, "average": statistics.fmean}
+
+
+def _prune(node, scores, iso, cplx) -> tuple[str | None, float | None]:
+    """Return (pruned GPR string, aggregate score) for an AST node, or (None, None)."""
+    if isinstance(node, ast.Name):
+        return node.id, scores.get(node.id)  # None = unscored (NaN: never removed)
+    if not isinstance(node, ast.BoolOp):
+        return None, None
+
+    children = [_prune(v, scores, iso, cplx) for v in node.values]
+    children = [(s, sc) for s, sc in children if s is not None]
+
+    if isinstance(node.op, ast.And):  # complex: keep every subunit, prune nested ORs
+        kept = children
+    else:  # OR / isozymes: drop negative-scoring alternatives, keep at least one
+        kept = [(s, sc) for s, sc in children if sc is None or sc >= 0]
+        if not kept:  # all negative → keep the least-negative
+            kept = [max(children, key=lambda c: c[1])]
+
+    parts = [s for s, _ in kept]
+    score_vals = [sc for _, sc in kept if sc is not None]
+    agg = (cplx if isinstance(node.op, ast.And) else iso)
+    score = agg(score_vals) if score_vals else None
+    op = " and " if isinstance(node.op, ast.And) else " or "
+    text = parts[0] if len(parts) == 1 else "(" + op.join(parts) + ")"
+    return text, score
+
+
+def remove_low_score_genes(
+    model: cobra.Model,
+    gene_scores: Mapping[str, float],
+    *,
+    isozyme_scoring: str = "max",
+    complex_scoring: str = "min",
+) -> tuple[cobra.Model, list[str]]:
+    """Remove negative-scoring genes from GPRs (RAVEN ``removeLowScoreGenes``).
+
+    ``gene_scores`` maps gene id → score; genes absent from it are treated as unscored
+    (never removed). Returns ``(new_model, removed_gene_ids)`` — genes dropped from
+    *every* rule they were in (and thus from the model). ``isozyme_scoring`` /
+    ``complex_scoring`` aggregate alternative/subunit scores (``max``/``min`` default).
+
+    When all isozyme alternatives are negative the least-negative one is kept
+    **deterministically** (first on a tie), unlike RAVEN's random tie-break — same
+    quality, reproducible.
+    """
+    for name, value in (("isozyme_scoring", isozyme_scoring), ("complex_scoring", complex_scoring)):
+        if value not in _AGG:
+            raise ValueError(f"{name} must be one of {sorted(_AGG)}; got {value!r}.")
+    iso, cplx = _AGG[isozyme_scoring], _AGG[complex_scoring]
+
+    out = model.copy()
+    for rxn in out.reactions:
+        body = rxn.gpr.body
+        if body is None or not rxn.genes:
+            continue
+        pruned, _ = _prune(body, gene_scores, iso, cplx)
+        if pruned is not None:
+            rxn.gene_reaction_rule = pruned
+
+    used = {g.id for rxn in out.reactions for g in rxn.genes}
+    removed = sorted(g.id for g in out.genes if g.id not in used)
+    if removed:
+        remove_genes(out, removed, remove_reactions=False)
+    return out, removed
diff --git a/src/raven_python/init/init.py b/src/raven_python/init/init.py
new file mode 100644
index 0000000..f23e17a
--- /dev/null
+++ b/src/raven_python/init/init.py
@@ -0,0 +1,254 @@
+"""The INIT MILP — tINIT core.
+
+INIT (Agren et al., PLoS Comput Biol 2012) extracts a context-specific model: keep a
+flux-consistent subnetwork that maximises the summed score of *included* reactions
+(positive score = evidence to keep, negative = evidence to remove), optionally
+rewarding net production of metabolites.
+
+Formulation:
+
+* Reversible reactions are split into forward / reverse directed reactions (flux ≥ 0).
+* Each non-essential directed reaction gets a binary ``x`` (included ⇔ ``x=1``) with
+  ``eps·x ≤ v ≤ ub·x`` — included reactions must carry flux ≥ ``eps`` (connectivity),
+  excluded ones carry none.
+* Essential reactions (``essential_rxns``) are forced to carry flux (``v ≥ eps``) and
+  skip the binary.
+* ``no_rev_loops`` adds ``x_fwd + x_rev ≤ 1`` so a reversible reaction can't look
+  "connected" via an internal forward/back loop.
+* Steady state ``S·v = 0`` per metabolite; ``allow_excretion`` relaxes it to ``≥ 0``
+  (net production allowed). With ``prod_weight > 0`` a per-metabolite sink
+  ``s_m ∈ [0,1]`` is added and rewarded, giving a reason to include connectivity
+  reactions.
+* Objective: **maximise** ``Σ score·x + prod_weight·Σ s_m``.
+
+Needs a MILP solver (cobra's configured optlang solver). On genome-scale problems,
+Gurobi is the only backend that is fully usable today (see
+``docs/init_solver_benchmark.md``).
+
+**Parameter caveat — magic numbers are scale-dependent.** ``eps`` (the flux an
+included reaction must carry, default 1.0) and ``prod_weight`` (default 0.5) only make
+sense when reaction bounds are ~±1000 and scores are O(1); the right values depend on
+the model's flux magnitudes and the score distribution. The upper gate uses each
+reaction's own ``ub`` as the big-M by default (adapts to the model); pass ``big_m`` to
+override with a fixed cap for a tighter LP relaxation. Calibration tables live in
+``docs/init_param_calibration.md``.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from dataclasses import dataclass
+
+import cobra
+from optlang.symbolics import Real, add, mul
+
+_EPS = 1.0  # flux an included reaction must carry (RAVEN's fake-met unit)
+
+
+@dataclass
+class _Directed:
+    """One directed reaction in the split (irreversible) problem."""
+
+    key: str
+    origin: str  # original reaction id
+    coeffs: dict[str, float]  # met id -> stoichiometry (already sign-adjusted)
+    ub: float
+    score: float
+    essential: bool
+
+
+@dataclass
+class InitResult:
+    """Result of :func:`run_init`."""
+
+    model: cobra.Model
+    deleted_reactions: list[str]
+    met_production: dict[str, bool]  # present-met name -> producible?
+    objective: float
+
+
+def _split_reactions(
+    model: cobra.Model, scores: Mapping[str, float], essential: set[str]
+) -> list[_Directed]:
+    directed: list[_Directed] = []
+    for rxn in model.reactions:
+        score = float(scores.get(rxn.id, 0.0))
+        coeffs = {m.id: c for m, c in rxn.metabolites.items()}
+        rev_coeffs = {m: -c for m, c in coeffs.items()}
+        if rxn.id in essential:
+            # Force flux in a *single* direction (forward if it can run forward, else
+            # reverse) — like an irreversible essential reaction. Emitting both halves
+            # as essential would force fwd ≥ eps AND rev ≥ eps, i.e. a phantom
+            # eps-magnitude self-loop that can starve out the real pathway.
+            if rxn.upper_bound > 0:
+                directed.append(_Directed(rxn.id, rxn.id, coeffs, rxn.upper_bound, score, True))
+            else:
+                directed.append(_Directed(f"{rxn.id}__rev", rxn.id, rev_coeffs,
+                                          -rxn.lower_bound, score, True))
+            continue
+        if rxn.upper_bound > 0:
+            directed.append(_Directed(rxn.id, rxn.id, coeffs, rxn.upper_bound, score, False))
+        if rxn.lower_bound < 0:  # reverse direction as its own non-negative flux
+            directed.append(
+                _Directed(f"{rxn.id}__rev", rxn.id, rev_coeffs, -rxn.lower_bound, score, False)
+            )
+    return directed
+
+
+def run_init(
+    model: cobra.Model,
+    rxn_scores: Mapping[str, float] | None = None,
+    *,
+    present_mets: Iterable[str] | None = None,
+    essential_rxns: Iterable[str] | None = None,
+    prod_weight: float = 0.5,
+    allow_excretion: bool = False,
+    no_rev_loops: bool = False,
+    eps: float = _EPS,
+    big_m: float | None = None,
+    mip_gap: float | None = None,
+    time_limit: float | None = None,
+) -> InitResult:
+    """Run the INIT MILP and return the extracted model.
+
+    ``rxn_scores`` maps reaction id → score (default 0). ``essential_rxns`` must be
+    kept (forced to carry flux). ``present_mets`` are metabolite *names* that the
+    network should be able to produce; each is tested and reported in
+    ``met_production``. See the module docstring for the formulation.
+
+    Note on score 0 (classic INIT vs. ftINIT divergence): in classic INIT a
+    reaction with score exactly 0 receives an include-indicator with **zero
+    reward**, so the optimiser is free to drop it. This matches RAVEN's
+    `runINIT` semantics. ftINIT inverts that — score-0 reactions stay in the
+    model unless they actively hurt feasibility — so a score of exactly 0
+    means *different things* in the two variants. If you want score-0
+    reactions kept here, pass a small positive value (e.g. ``min_score`` from
+    `gene_scores_from_expression`) instead of 0.
+    """
+    scores = dict(rxn_scores or {})
+    essential = set(essential_rxns or [])
+    present = list(present_mets or [])
+
+    directed = _split_reactions(model, scores, essential)
+    prob = model.problem
+    opt = prob.Model()
+
+    # Flux variables for every directed reaction.
+    flux = {d.key: prob.Variable(f"v_{d.key}", lb=0.0, ub=d.ub) for d in directed}
+
+    # Binary include-indicators for non-essential reactions; eps*x <= v <= ub*x.
+    keep: dict[str, object] = {}
+    gates = []
+    for d in directed:
+        if d.essential:
+            flux[d.key].lb = max(eps, 0.0)  # forced to carry flux
+            continue
+        x = prob.Variable(f"x_{d.key}", type="binary")
+        keep[d.key] = x
+        cap = d.ub if big_m is None else big_m  # big-M: per-reaction bound (default) or fixed
+        gates.append(prob.Constraint(flux[d.key] - cap * x, ub=0.0, name=f"ub_{d.key}"))
+        gates.append(prob.Constraint(flux[d.key] - eps * x, lb=0.0, name=f"lb_{d.key}"))
+
+    # no_rev_loops: at most one direction of a reversible reaction is included.
+    by_origin: dict[str, list[str]] = {}
+    for d in directed:
+        by_origin.setdefault(d.origin, []).append(d.key)
+    if no_rev_loops:
+        for keys in by_origin.values():
+            xs = [keep[k] for k in keys if k in keep]
+            if len(xs) > 1:
+                gates.append(prob.Constraint(sum(xs), ub=1.0, name=f"onedir_{keys[0]}"))
+
+    # Steady-state constraints S·v (- sink) {==0 | >=0}, plus prod_weight sinks.
+    # Accumulate each metabolite's terms by iterating reactions once (avoids the
+    # O(mets·rxns) per-metabolite filter) and sum with optlang.symbolics.add — Python
+    # sum() re-canonicalises a growing sympy expression each step (O(n²)), which is
+    # minutes per hub metabolite at genome scale.
+    met_terms: dict[str, list] = {met.id: [] for met in model.metabolites}
+    for d in directed:
+        v = flux[d.key]
+        for mid, coeff in d.coeffs.items():
+            met_terms[mid].append(mul([Real(coeff), v]))
+
+    sinks: dict[str, object] = {}
+    met_constraints: dict[str, object] = {}
+    ub = None if allow_excretion else 0.0
+    for met in model.metabolites:
+        terms = met_terms[met.id]
+        if prod_weight != 0:
+            s = prob.Variable(f"s_{met.id}", lb=0.0, ub=1.0)
+            sinks[met.id] = s
+            terms = [*terms, mul([Real(-1.0), s])]  # net production drained into rewarded sink
+        if terms:
+            met_constraints[met.id] = prob.Constraint(add(terms), lb=0.0, ub=ub)
+
+    opt.add(list(flux.values()) + list(keep.values()) + list(sinks.values())
+            + gates + list(met_constraints.values()))
+
+    objective = prob.Objective(
+        add([mul([Real(d.score), keep[d.key]]) for d in directed if d.key in keep]
+            + [mul([Real(prod_weight), s]) for s in sinks.values()]),
+        direction="max",
+    )
+    opt.objective = objective
+
+    met_production = _check_present_mets(prob, present, model, directed, allow_excretion)
+
+    if time_limit is not None:
+        opt.configuration.timeout = int(time_limit)
+    if mip_gap is not None:
+        try:  # Gurobi-specific; harmless if the backend differs
+            opt.problem.Params.MIPGap = mip_gap
+        except Exception:  # noqa: BLE001
+            pass
+    opt.optimize()
+    # With a MIP gap / time limit set, accept a near-optimal incumbent (as RAVEN does).
+    if opt.status not in ("optimal", "feasible", "suboptimal", "time_limit"):
+        raise RuntimeError(f"INIT MILP did not solve (status: {opt.status}).")
+
+    # A reaction is kept if any of its directed parts is essential or has x≈1.
+    kept_origins = {d.origin for d in directed if d.essential}
+    kept_origins |= {d.origin for d in directed if d.key in keep and (keep[d.key].primal or 0) > 0.5}
+    deleted = [r.id for r in model.reactions if r.id not in kept_origins]
+
+    out = model.copy()
+    out.remove_reactions(deleted, remove_orphans=True)
+    return InitResult(out, sorted(deleted), met_production, float(opt.objective.value))
+
+
+def _check_present_mets(prob, present, model, directed, allow_excretion) -> dict[str, bool]:
+    """Whether each present metabolite (by name) can be net-produced at all.
+
+    A small LP per metabolite (no score/binary, so it's the LP relaxation, as RAVEN
+    does): all reactions available, steady state, and a demand draining ≥1 unit of
+    any compartment form of the metabolite — feasible ⇔ producible.
+    """
+    if not present:
+        return {}
+    name_to_ids: dict[str, list[str]] = {}
+    for met in model.metabolites:
+        name_to_ids.setdefault((met.name or met.id).upper(), []).append(met.id)
+
+    result: dict[str, bool] = {}
+    for name in present:
+        ids = name_to_ids.get(name.upper())
+        if not ids:
+            result[name] = False
+            continue
+        lp = prob.Model()
+        flux = {d.key: prob.Variable(f"v_{d.key}", lb=0.0, ub=d.ub) for d in directed}
+        drains = {mid: prob.Variable(f"drain_{mid}", lb=0.0, ub=1e6) for mid in ids}
+        terms: dict[str, list] = {met.id: [] for met in model.metabolites}
+        for d in directed:
+            v = flux[d.key]
+            for mid, c in d.coeffs.items():
+                terms[mid].append(mul([Real(c), v]))
+        for mid in drains:
+            terms[mid].append(mul([Real(-1.0), drains[mid]]))
+        cons = [prob.Constraint(add(t), lb=0.0, ub=None if allow_excretion else 0.0)
+                for t in terms.values() if t]
+        require = prob.Constraint(add(list(drains.values())), lb=1.0, name="_require_production")
+        lp.add(list(flux.values()) + list(drains.values()) + cons + [require])
+        lp.objective = prob.Objective(prob.Variable("_zero", lb=0, ub=0), direction="max")
+        lp.optimize()
+        result[name] = lp.status == "optimal"
+    return result
diff --git a/src/raven_python/init/merge.py b/src/raven_python/init/merge.py
new file mode 100644
index 0000000..a26f41c
--- /dev/null
+++ b/src/raven_python/init/merge.py
@@ -0,0 +1,226 @@
+"""Linear reaction merging for ftINIT.
+
+ftINIT shrinks the MILP losslessly by **contracting linear reaction chains**: a
+metabolite that appears in exactly two reactions (one net producer, one net consumer)
+links them into a single combined reaction. Iterating this collapses unbranched
+pathways — on Human-GEM ~12k → ~8k reactions, a ~⅓ smaller MILP — without changing
+the feasible flux space. Reversible reactions may merge too (unlike
+``simplifyModel``'s merge), which is why ftINIT ships its own.
+
+:func:`merge_linear` returns the reduced model plus the bookkeeping needed to map
+scores and results back to the original reactions:
+
+* ``group_ids`` — one integer per original reaction; ``0`` = not merged, equal
+  non-zero integers = merged into the same combined reaction (which keeps one
+  member's id).
+* ``reversed_rxns`` — which originals were flipped (their stored direction negated)
+  when oriented for merging; needed to map fluxes/directions back.
+
+:func:`group_rxn_scores` then sums the original per-reaction scores over each group,
+with RAVEN's zero-handling (see its docstring): genuine 0 → 0.01, ignore-masked → 0,
+a group cancelling to 0 with non-zero members → 0.01 — all so the MILP never sees an
+exactly-zero score (whose on/off would be arbitrary).
+"""
+from __future__ import annotations
+
+import math
+from collections import defaultdict
+from collections.abc import Iterable, Mapping
+
+import cobra
+
+_TOL = 1e-12
+
+
+class _Rxn:
+    """Mutable working reaction during the merge."""
+
+    __slots__ = ("id", "name", "coeffs", "lb", "ub")
+
+    def __init__(self, rid, name, coeffs, lb, ub):
+        self.id, self.name, self.coeffs, self.lb, self.ub = rid, name, coeffs, lb, ub
+
+    @property
+    def reversible(self) -> bool:  # RAVEN's rev flag ≡ a negative lower bound
+        return self.lb < 0
+
+
+def merge_linear(
+    model: cobra.Model, no_merge: Iterable[str] = ()
+) -> tuple[cobra.Model, list[str], list[int], list[bool]]:
+    """Merge linearly-dependent reactions; return ``(reduced, orig_ids, group_ids, reversed)``.
+
+    ``no_merge`` reaction ids are never merged. The reduced model carries no genes
+    (merging makes GPRs meaningless); scores are remapped with
+    :func:`group_rxn_scores`.
+
+    Each pass recomputes the metabolite→reaction incidence fresh, then merges over the
+    degree-2 metabolites found at the start of the pass. A metabolite that only
+    *becomes* degree-2 mid-pass (because one of its reactions was just merged into a
+    survivor) is therefore picked up on the next pass rather than immediately — linear
+    merging is confluent, so the final grouping is the same regardless, it just takes a
+    few extra passes on long chains. (RAVEN re-finds incidence per metabolite and so
+    finishes a chain in one pass; the end result is equivalent.)
+    """
+    banned = set(no_merge)
+    orig_ids = [r.id for r in model.reactions]
+    group_of: dict[str, int] = {rid: 0 for rid in orig_ids}
+    reversed_of: dict[str, bool] = {rid: False for rid in orig_ids}
+    next_group = 1
+
+    rxns = [
+        _Rxn(r.id, r.name, {m.id: c for m, c in r.metabolites.items()},
+             r.lower_bound, r.upper_bound)
+        for r in model.reactions
+    ]
+
+    def flip(rx: _Rxn) -> None:
+        rx.coeffs = {m: -c for m, c in rx.coeffs.items()}
+        rx.lb, rx.ub = -rx.ub, -rx.lb
+        grp = group_of[rx.id]
+        targets = [o for o in orig_ids if group_of[o] == grp] if grp else [rx.id]
+        for o in targets:
+            reversed_of[o] = not reversed_of[o]
+
+    def relabel(rx: _Rxn, grp: int) -> None:
+        old = group_of[rx.id]
+        if old == grp:
+            return
+        if old == 0:
+            group_of[rx.id] = grp
+        else:
+            for o in orig_ids:
+                if group_of[o] == old:
+                    group_of[o] = grp
+
+    while True:
+        incidence: dict[str, list[int]] = defaultdict(list)
+        for i, rx in enumerate(rxns):
+            for m in rx.coeffs:
+                incidence[m].append(i)
+        degree2 = [m for m, ii in incidence.items() if len(ii) == 2]
+
+        merged_some = False
+        for met in degree2:
+            involved = [i for i in incidence[met] if met in rxns[i].coeffs]
+            if len(involved) != 2:
+                continue  # one side already merged away this pass
+            a, b = involved
+            if rxns[a].id in banned or rxns[b].id in banned:
+                continue
+            ca, cb = rxns[a].coeffs[met], rxns[b].coeffs[met]
+            ra, rb = rxns[a].reversible, rxns[b].reversible
+            pos = (ca > 0 or ra) + (cb > 0 or rb)
+            neg = (ca < 0 or ra) + (cb < 0 or rb)
+            if pos < 1 or neg < 1:
+                continue  # need one producer and one consumer
+
+            r1, r2 = a, b
+            # Special case: rev producer first, irrev producer second → swap (RAVEN l.74).
+            if rxns[r1].reversible and not rxns[r2].reversible \
+                    and rxns[r1].coeffs[met] > 0 and rxns[r2].coeffs[met] > 0:
+                r1, r2 = r2, r1
+            # Make r1 the producer of `met`.
+            if rxns[r1].coeffs[met] < 0:
+                if rxns[r2].coeffs[met] > 0:
+                    r1, r2 = r2, r1
+                elif rxns[r1].reversible:
+                    flip(rxns[r1])
+                elif rxns[r2].reversible:
+                    flip(rxns[r2])
+                    r1, r2 = r2, r1
+                else:
+                    raise RuntimeError("mergeLinear: no producer orientation possible.")
+            # Make r2 the consumer.
+            if rxns[r2].coeffs[met] > 0:
+                if rxns[r2].reversible:
+                    flip(rxns[r2])
+                else:
+                    raise RuntimeError("mergeLinear: no consumer orientation possible.")
+
+            ratio = abs(rxns[r1].coeffs[met] / rxns[r2].coeffs[met])
+            merged = defaultdict(float, rxns[r1].coeffs)
+            for m, c in rxns[r2].coeffs.items():
+                merged[m] += c * ratio
+            merged[met] = 0.0
+            rxns[r1].coeffs = {m: c for m, c in merged.items() if abs(c) > _TOL}
+
+            # Most-constraining bounds win (RAVEN scales r2's bounds by the ratio).
+            if not math.isinf(rxns[r2].lb):
+                rxns[r1].lb = max(rxns[r1].lb, rxns[r2].lb / ratio)
+            if not math.isinf(rxns[r2].ub):
+                rxns[r1].ub = min(rxns[r1].ub, rxns[r2].ub / ratio)
+            rxns[r2].coeffs = {}  # cleared → removed after the pass
+
+            grp = max(group_of[rxns[r1].id], group_of[rxns[r2].id]) or next_group
+            if grp == next_group:
+                next_group += 1
+            relabel(rxns[r1], grp)
+            relabel(rxns[r2], grp)
+            merged_some = True
+
+        if not merged_some:
+            break
+        rxns = [rx for rx in rxns if rx.coeffs]
+
+    return _build_model(model, rxns), orig_ids, [group_of[o] for o in orig_ids], \
+        [reversed_of[o] for o in orig_ids]
+
+
+def _build_model(template: cobra.Model, rxns: list[_Rxn]) -> cobra.Model:
+    """Assemble the reduced cobra model (gene-free) from the merged working reactions."""
+    reduced = cobra.Model(template.id)
+    used = {m for rx in rxns for m in rx.coeffs}
+    reduced.add_metabolites([
+        cobra.Metabolite(m.id, name=m.name, compartment=m.compartment, formula=m.formula)
+        for m in template.metabolites if m.id in used  # template order preserved
+    ])
+    new_rxns = []
+    for rx in rxns:
+        r = cobra.Reaction(rx.id, name=rx.name, lower_bound=rx.lb, upper_bound=rx.ub)
+        new_rxns.append(r)
+    reduced.add_reactions(new_rxns)
+    for rx, r in zip(rxns, new_rxns, strict=True):
+        r.add_metabolites({reduced.metabolites.get_by_id(m): c for m, c in rx.coeffs.items()})
+    return reduced
+
+
+def group_rxn_scores(
+    reduced_model: cobra.Model,
+    orig_scores: Mapping[str, float],
+    orig_rxn_ids: list[str],
+    group_ids: list[int],
+    to_zero: Iterable[str] = (),
+) -> dict[str, float]:
+    """Sum original reaction scores over merged groups (RAVEN ``groupRxnScores``).
+
+    ``orig_scores`` maps original reaction id → score; ``to_zero`` are reactions to
+    drop from the problem (the ``toIgnore`` masks) — their score becomes 0. Genuine
+    zeros and groups cancelling to zero become 0.01 so the MILP never sees an exactly
+    zero score. Returns ``{reduced_reaction_id: score}``.
+    """
+    zero = set(to_zero)
+    group_of = dict(zip(orig_rxn_ids, group_ids, strict=True))
+    # Per-original adjusted score: genuine 0 → 0.01, then ignore-masked → 0.
+    adj: dict[str, float] = {}
+    for rid in orig_rxn_ids:
+        s = float(orig_scores.get(rid, 0.0))
+        s = 0.01 if s == 0.0 else s
+        adj[rid] = 0.0 if rid in zero else s
+    members: dict[int, list[str]] = defaultdict(list)
+    for rid in orig_rxn_ids:
+        if group_of[rid] != 0:  # only merged groups need member lists
+            members[group_of[rid]].append(rid)
+
+    scores: dict[str, float] = {}
+    for r in reduced_model.reactions:
+        grp = group_of[r.id]
+        if grp == 0:  # unmerged: keep the reaction's own (adjusted) score
+            scores[r.id] = adj[r.id]
+        else:
+            group = members[grp]
+            total = sum(adj[m] for m in group)
+            if total == 0.0 and any(adj[m] != 0.0 for m in group):
+                total = 0.01  # cancelled to zero but had non-zero members
+            scores[r.id] = total
+    return scores
diff --git a/src/raven_python/init/prep.py b/src/raven_python/init/prep.py
new file mode 100644
index 0000000..8ed4b89
--- /dev/null
+++ b/src/raven_python/init/prep.py
@@ -0,0 +1,241 @@
+"""ftINIT preprocessing — once-per-template work shared by every sample on a model.
+
+ftINIT does all omics-independent work once: classify reactions into the categories
+the staged MILP may *ignore* (leave in, never remove), discover task-essential
+reactions, linearly merge, and scale. The result (:class:`PrepData`) is reused across
+every sample.
+
+:func:`classify_reactions` is the reaction taxonomy: exchange, GPR-less
+import / simple / advanced transport, spontaneous, GPR-less extracellular, custom, and
+"any without a GPR". The staged schedule (:func:`raven_python.init.get_init_steps`) selects
+which categories to keep out of each MILP step via an 8-bit pattern.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable
+from dataclasses import dataclass, field
+
+import cobra
+
+from raven_python.init.merge import merge_linear
+from raven_python.tasks import Task, find_task_essential_reactions
+
+
+@dataclass
+class ReactionMasks:
+    """Reaction-category id sets (RAVEN's ``toIgnore*``), in 8-bit-pattern order.
+
+    ``ignored(pattern)`` returns the union of the categories whose bit is set — the
+    reactions held out of (left untouched by) that MILP step.
+    """
+
+    exchange: set[str] = field(default_factory=set)            # b1
+    import_rxns: set[str] = field(default_factory=set)         # b2
+    simple_transport: set[str] = field(default_factory=set)    # b3
+    advanced_transport: set[str] = field(default_factory=set)  # b4
+    spontaneous: set[str] = field(default_factory=set)         # b5
+    extracellular: set[str] = field(default_factory=set)       # b6 (no-GPR, all mets in ext comp)
+    custom: set[str] = field(default_factory=set)              # b7
+    no_gpr: set[str] = field(default_factory=set)              # b8
+
+    def _ordered(self) -> list[set[str]]:
+        return [self.exchange, self.import_rxns, self.simple_transport,
+                self.advanced_transport, self.spontaneous, self.extracellular,
+                self.custom, self.no_gpr]
+
+    def ignored(self, pattern: Iterable[int]) -> set[str]:
+        out: set[str] = set()
+        for bit, group in zip(pattern, self._ordered(), strict=True):
+            if bit:
+                out |= group
+        return out
+
+
+def _is_advanced_transport(rxn: cobra.Reaction) -> bool:
+    """Even number (>2) of mets pairing up by name across compartments with canceling stoich."""
+    mets = list(rxn.metabolites.items())
+    if len(mets) <= 2 or len(mets) % 2 != 0:
+        return False
+    remaining = [(m.name, m.compartment, c) for m, c in mets]
+    while remaining:
+        name, comp, coeff = remaining[0]
+        matches = [i for i in range(1, len(remaining)) if remaining[i][0] == name]
+        if len(matches) != 1:
+            return False
+        j = matches[0]
+        if coeff + remaining[j][2] != 0 or comp == remaining[j][1]:
+            return False
+        remaining = [r for k, r in enumerate(remaining) if k not in (0, j)]
+    return True
+
+
+def classify_reactions(
+    model: cobra.Model,
+    *,
+    ext_comp: str = "e",
+    spontaneous: Iterable[str] = (),
+    custom: Iterable[str] = (),
+) -> ReactionMasks:
+    """Classify reactions into the ftINIT ``toIgnore`` categories (``prepINITModel``).
+
+    ``ext_comp`` is the extracellular compartment. ``spontaneous``/``custom`` are
+    reaction-id lists. A reaction is "GPR-less" when its gene rule is empty.
+    """
+    spont, cust = set(spontaneous), set(custom)
+    masks = ReactionMasks(
+        exchange={r.id for r in model.boundary},
+        spontaneous={r.id for r in model.reactions if r.id in spont},
+        custom={r.id for r in model.reactions if r.id in cust},
+        no_gpr={r.id for r in model.reactions if not r.gene_reaction_rule.strip()},
+    )
+    for rxn in model.reactions:
+        if rxn.gene_reaction_rule.strip():
+            continue  # transport categories are GPR-less only
+        mets = list(rxn.metabolites)
+        if len(mets) == 2:
+            (m1, m2) = mets
+            if m1.compartment != m2.compartment and m1.name == m2.name:
+                if ext_comp in (m1.compartment, m2.compartment):
+                    masks.import_rxns.add(rxn.id)
+                else:
+                    masks.simple_transport.add(rxn.id)
+        elif _is_advanced_transport(rxn):
+            masks.advanced_transport.add(rxn.id)
+        if len(mets) > 1 and all(m.compartment == ext_comp for m in mets):
+            masks.extracellular.add(rxn.id)
+    return masks
+
+
+@dataclass
+class PrepData:
+    """One-time ftINIT preprocessing of a template model (RAVEN ``prepData``).
+
+    Built once per template, reused across samples. ``min_model`` is the merged model
+    the MILP runs on; ``orig_rxn_ids``/``group_ids`` map its reactions back to the
+    ``ref_model`` (the simplified, pre-merge reference). ``essential_rxns`` are in
+    **merged** ids and pre-oriented irreversibly (so the MILP forces flux *forward*).
+    ``masks`` is on ``ref_model`` (= original) ids.
+    """
+
+    ref_model: cobra.Model
+    min_model: cobra.Model
+    orig_rxn_ids: list[str]
+    group_ids: list[int]
+    reversed_rxns: list[bool]
+    masks: ReactionMasks
+    essential_rxns: set[str] = field(default_factory=set)
+    essential_mets_for_tasks: set[str] = field(default_factory=set)
+    tasks: list[Task] = field(default_factory=list)
+
+    @property
+    def group_of(self) -> dict[str, int]:
+        return dict(zip(self.orig_rxn_ids, self.group_ids, strict=True))
+
+
+def rescale_for_init(model: cobra.Model, max_stoich_diff: float = 25.0) -> None:
+    """Compress each reaction's stoichiometric dynamic range.
+
+    Large spreads in stoichiometric coefficients (e.g. a biomass/pool reaction with
+    coefficients from 1e-3 to 1e2) force correspondingly extreme flux magnitudes, so no
+    single MILP big-M fits all reactions. RAVEN, per reaction: caps every ``|coeff|`` at
+    ``max_stoich_diff × min|coeff|`` (keeping signs), then scales the whole reaction so its
+    mean ``|coeff|`` is 1. Bounds are reset to ``±1000`` afterwards. Modifies ``model`` in
+    place; only the merged MILP model is scaled (the final output maps back to the
+    unscaled ``ref_model`` by reaction id, so reaction *selection* is unaffected).
+    """
+    for rxn in model.reactions:
+        items = list(rxn.metabolites.items())
+        if not items:
+            continue
+        cap = max_stoich_diff * min(abs(c) for _, c in items)
+        capped = {m: ((cap if c > 0 else -cap) if abs(c) > cap else c) for m, c in items}
+        total = sum(abs(c) for c in capped.values())
+        scale = (len(capped) / total) if total else 1.0
+        rxn.add_metabolites({m: c * scale for m, c in capped.items()}, combine=False)
+    for rxn in model.reactions:  # RAVEN resets bounds to the standard ±1000 after scaling
+        if rxn.upper_bound > 0:
+            rxn.upper_bound = 1000.0
+        if rxn.lower_bound < 0:
+            rxn.lower_bound = -1000.0
+
+
+def _orient_forward(rxn: cobra.Reaction, direction: int) -> None:
+    """Make ``rxn`` carry flux only in its forced direction (irreversible forward)."""
+    if direction < 0:  # flip so the forced (reverse) direction becomes forward
+        rxn.add_metabolites({m: -2 * c for m, c in rxn.metabolites.items()})
+        rxn.bounds = (-rxn.upper_bound, -rxn.lower_bound)
+    rxn.lower_bound = max(rxn.lower_bound, 0.0)
+
+
+def prep_init_model(
+    template: cobra.Model,
+    tasks: Iterable[Task] | None = None,
+    *,
+    ext_comp: str = "e",
+    spontaneous: Iterable[str] = (),
+    custom: Iterable[str] = (),
+    essential_cache_path=None,
+    scale: bool = True,
+) -> PrepData:
+    """Build :class:`PrepData` from a template model — the once-per-template work shared
+    by every ftINIT sample on this model.
+
+    With ``tasks``, discovers the task-essential reactions (kept regardless of score),
+    orients them irreversibly in their required direction, and drops tasks that are
+    infeasible. Then classifies reactions into the omics-independent categories, linearly
+    merges, and (unless ``scale=False``) rescales the merged model's stoichiometry
+    (:func:`rescale_for_init`) so a single MILP big-M is valid across all reactions —
+    without this, genome-scale ftINIT is infeasible / intractable.
+
+    ``essential_cache_path`` makes the (slow, genome-scale) essential-reaction discovery
+    **resumable** across interruptions — see :func:`find_task_essential_reactions`.
+    """
+    ref_model = template.copy()
+
+    essential_pre: dict[str, int] = {}
+    task_mets: set[str] = set()
+    kept_tasks: list[Task] = []
+    if tasks is not None:
+        tasks = list(tasks)
+        ess = find_task_essential_reactions(ref_model, tasks, cache_path=essential_cache_path)
+        essential_pre = ess.reactions
+        task_mets = ess.task_metabolites
+        kept_tasks = [t for t in tasks if t.id not in ess.failed_tasks]
+
+    # Orient essentials irreversibly (forced direction → forward) before merging, so
+    # the merge keeps them forward and the MILP forces them with a simple lower bound.
+    for rid, direction in essential_pre.items():
+        _orient_forward(ref_model.reactions.get_by_id(rid), direction)
+
+    masks = classify_reactions(ref_model, ext_comp=ext_comp,
+                               spontaneous=spontaneous, custom=custom)
+
+    min_model, orig_ids, group_ids, reversed_rxns = merge_linear(ref_model)
+    if scale:  # compress stoichiometric dynamic range so the MILP big-M fits all reactions
+        rescale_for_init(min_model)
+    group_of = dict(zip(orig_ids, group_ids, strict=True))
+
+    # Map essentials to the merged model: the survivor of each group containing an
+    # essential (or the reaction itself if unmerged). All are forward after orientation.
+    # An essential that merged into a group which collapsed away (e.g. a trivial
+    # source→sink chain) has no survivor and imposes no constraint — skip it.
+    survivor_by_group = {group_of[r.id]: r.id for r in min_model.reactions if group_of[r.id]}
+    essential_merged: set[str] = set()
+    for rid in essential_pre:
+        gid = group_of[rid]
+        if gid == 0:
+            essential_merged.add(rid)
+        elif gid in survivor_by_group:
+            essential_merged.add(survivor_by_group[gid])
+
+    return PrepData(
+        ref_model=ref_model,
+        min_model=min_model,
+        orig_rxn_ids=orig_ids,
+        group_ids=group_ids,
+        reversed_rxns=reversed_rxns,
+        masks=masks,
+        essential_rxns=essential_merged,
+        essential_mets_for_tasks=task_mets,
+        tasks=kept_tasks,
+    )
diff --git a/src/raven_python/init/score.py b/src/raven_python/init/score.py
new file mode 100644
index 0000000..6e14f86
--- /dev/null
+++ b/src/raven_python/init/score.py
@@ -0,0 +1,86 @@
+"""Score reactions from gene scores via the GPR.
+
+Maps per-gene scores (e.g. expression-derived: present → positive, absent → negative)
+to per-reaction scores by walking each reaction's GPR: genes joined by **OR**
+(isozymes) are combined with ``isozyme_scoring`` (default ``max``); genes joined by
+**AND** (complexes) with ``complex_scoring`` (default ``min``). Genes missing from
+``gene_scores`` are *omitted*; a reaction with no genes — or whose genes are all
+missing — gets ``no_gene_score`` (default −2). These reaction scores feed
+:func:`raven_python.init.run_init` and :func:`raven_python.init.ftinit`.
+
+Upstream — the omics-data → gene-score step (thresholding, expression levels) — lives
+in :mod:`raven_python.omics`; this function takes gene scores as given.
+"""
+from __future__ import annotations
+
+import ast
+import math
+import statistics
+from collections.abc import Mapping
+
+import cobra
+
+_AGG = {"min": min, "max": max, "median": statistics.median, "average": statistics.fmean}
+
+
+def gene_scores_from_expression(
+    expression: Mapping[str, float],
+    reference: Mapping[str, float] | float,
+    *,
+    factor: float = 5.0,
+    max_score: float = 10.0,
+    min_score: float = -5.0,
+) -> dict[str, float]:
+    """Gene scores from RNA-seq/array expression, RAVEN's ``5·ln(level/reference)``.
+
+    This is tINIT's usual entry point (RNA-seq is the common case; single-cell and
+    HPA are alternative upstream sources). ``reference`` is either a per-gene
+    reference level (e.g. the cross-sample mean) or a single threshold for all genes:
+    a gene expressed above its reference scores positive, below it negative. The
+    score is clamped to ``[min_score, max_score]``; non-positive level/reference (and
+    missing reference) → ``min_score`` (RAVEN maps these NaNs to -5).
+    """
+    scalar = isinstance(reference, (int, float))
+    scores: dict[str, float] = {}
+    for gene, level in expression.items():
+        ref = reference if scalar else reference.get(gene)
+        if not level or not ref or level <= 0 or ref <= 0:
+            scores[gene] = min_score
+        else:
+            scores[gene] = max(min(factor * math.log(level / ref), max_score), min_score)
+    return scores
+
+
+def _score_node(node, gene_scores: Mapping[str, float], iso, cplx) -> float | None:
+    if isinstance(node, ast.Name):
+        return gene_scores.get(node.id)  # None if the gene has no score
+    if isinstance(node, ast.BoolOp):
+        agg = iso if isinstance(node.op, ast.Or) else cplx
+        vals = [s for v in node.values if (s := _score_node(v, gene_scores, iso, cplx)) is not None]
+        return agg(vals) if vals else None
+    return None
+
+
+def score_reactions_from_genes(
+    model: cobra.Model,
+    gene_scores: Mapping[str, float],
+    *,
+    isozyme_scoring: str = "max",
+    complex_scoring: str = "min",
+    no_gene_score: float = -2.0,
+) -> dict[str, float]:
+    """Return ``{reaction_id: score}`` from per-gene scores via each reaction's GPR."""
+    for name, value in (("isozyme_scoring", isozyme_scoring), ("complex_scoring", complex_scoring)):
+        if value not in _AGG:
+            raise ValueError(f"{name} must be one of {sorted(_AGG)}; got {value!r}.")
+    iso, cplx = _AGG[isozyme_scoring], _AGG[complex_scoring]
+
+    scores: dict[str, float] = {}
+    for rxn in model.reactions:
+        body = rxn.gpr.body
+        if body is None or not rxn.genes:
+            scores[rxn.id] = no_gene_score
+        else:
+            value = _score_node(body, gene_scores, iso, cplx)
+            scores[rxn.id] = no_gene_score if value is None else float(value)
+    return scores
diff --git a/src/raven_python/init/steps.py b/src/raven_python/init/steps.py
new file mode 100644
index 0000000..d8a7b86
--- /dev/null
+++ b/src/raven_python/init/steps.py
@@ -0,0 +1,62 @@
+"""ftINIT step schedule.
+
+ftINIT runs as a short sequence of MILP steps instead of one big MILP. Each step
+(:class:`InitStep`) chooses which reaction categories to hold out of the problem
+(``ignore_mask``, an 8-bit pattern over :class:`raven_python.init.ReactionMasks`), whether
+to drop positive reversibles and allow metabolite secretion, and how to treat the
+reactions turned on by previous steps (``'ignore'`` for the first step, ``'essential'``
+to fix them on). :func:`get_init_steps` builds the standard schedules.
+
+The default ``'1+1'`` is two steps: step 1 decides only the GPR-associated reactions
+(everything GPR-less is held out); step 2 brings the GPR-less transport / extracellular
+reactions in with step-1 reactions fixed as essential. ``'full'`` is the single-MILP
+classic-tINIT variant (nothing held out).
+"""
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+
+# 8-bit ignore patterns (exchange, import, simple-transp, adv-transp, spontaneous,
+# extracellular, custom, no-GPR) — see ReactionMasks.
+_ALL_NO_GPR_KEPT = (1, 1, 1, 1, 1, 1, 1, 0)  # hold out every GPR-less category but "all no-GPR"
+_EXCH_SPONT = (1, 0, 0, 0, 1, 0, 0, 0)        # hold out only exchange + spontaneous
+_NONE = (0, 0, 0, 0, 0, 0, 0, 0)
+
+
+@dataclass
+class InitStep:
+    """One ftINIT MILP step."""
+
+    how_to_use_prev: str = "essential"          # 'ignore' | 'essential'
+    ignore_mask: tuple[int, ...] = _ALL_NO_GPR_KEPT
+    pos_rev_off: bool = False                    # drop positive reversibles from the problem
+    allow_met_secr: bool = False                 # relax S·v = 0 to ≥ 0
+    mets_to_ignore: Sequence[str] = field(default_factory=tuple)  # met names zeroed from S (e.g. H2O)
+
+
+def get_init_steps(series: str = "1+1", *, mets_to_ignore: Sequence[str] = ()) -> list[InitStep]:
+    """Return the step schedule for a named ftINIT ``series`` (RAVEN ``getINITSteps``).
+
+    ``'1+1'`` (default, step 1+2 merged), ``'2+1'`` (3-step), ``'1+0'``/``'2+0'``
+    (skip the final GPR-less step), ``'full'`` (single MILP). ``mets_to_ignore`` are
+    metabolite names removed from the stoichiometry in each step (e.g. H2O, H+).
+    """
+    m = tuple(mets_to_ignore)
+    s1 = InitStep("ignore", _ALL_NO_GPR_KEPT, mets_to_ignore=m)
+    s1_posrev = InitStep("ignore", _ALL_NO_GPR_KEPT, pos_rev_off=True, allow_met_secr=True,
+                         mets_to_ignore=m)
+    s2_all = InitStep("essential", _ALL_NO_GPR_KEPT, mets_to_ignore=m)
+    s_final = InitStep("essential", _EXCH_SPONT, mets_to_ignore=m)
+
+    if series == "1+1":
+        return [s1, s_final]
+    if series == "2+1":
+        return [s1_posrev, s2_all, s_final]
+    if series == "1+0":
+        return [s1]
+    if series == "2+0":
+        return [s1_posrev, s2_all]
+    if series == "full":
+        return [InitStep("ignore", _NONE, mets_to_ignore=m)]
+    raise ValueError(f"Unknown ftINIT series {series!r}; expected 1+1, 2+1, 1+0, 2+0, full.")
diff --git a/src/raven_python/init/taskfill.py b/src/raven_python/init/taskfill.py
new file mode 100644
index 0000000..58501ce
--- /dev/null
+++ b/src/raven_python/init/taskfill.py
@@ -0,0 +1,183 @@
+"""Task gap-filling for ftINIT.
+
+After ftINIT extracts a context-specific model, some metabolic tasks may no longer be
+feasible (the scoring removed reactions a task needs). :func:`fill_tasks` restores
+feasibility by adding back the **minimum-cost** set of reactions from the reference
+(template) model — cost = ``−score``, so high-scoring reactions are preferred — one
+task at a time, only for tasks that are actually infeasible (a cheap LP check gates
+the expensive MILP), accumulating additions across tasks.
+
+This is a different MILP from ftINIT's main extraction: it *adds* reactions to satisfy
+the task's ranged metabolite bounds (RAVEN's two-column ``b``), rather than selecting
+which to keep by expression score. Exchange reactions are not used to fill gaps (task
+inputs/outputs come from the task's ``b``), so they are excluded as candidates.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from dataclasses import dataclass
+
+import cobra
+from optlang.symbolics import Real, add, mul
+
+from raven_python.tasks import Task
+from raven_python.tasks.check import (
+    _metabolite_bounds,
+    _set_constraint_bounds,
+    apply_task_constraints,
+    task_name_maps,
+)
+
+_DEFAULT_SCORE = -1.0   # RAVEN: missing scores default to -1 (cost 1)
+_MAX_SCORE = -0.1       # RAVEN min(score, -0.1): every added reaction costs ≥ 0.1
+
+
+@dataclass
+class TaskFillResult:
+    """Result of :func:`fill_tasks`: the gap-filled model and what was added."""
+
+    model: cobra.Model
+    added_reactions: list[str]
+    failed_tasks: list[str]
+
+
+def _closed_copy(model: cobra.Model) -> cobra.Model:
+    """A copy with boundary reactions closed: task I/O comes only from the task's b."""
+    out = model.copy()
+    for rxn in out.boundary:
+        rxn.bounds = (0.0, 0.0)
+    return out
+
+
+def _feasible(model: cobra.Model, task: Task, name_to_id, comp_to_ids) -> bool:
+    """Is ``task`` feasible in ``model`` (boundaries closed)? Tested in place, then reverted.
+
+    Avoids copying the (genome-scale) model for each of the task list's feasibility checks
+    — the copy dominated gap-fill runtime. ``with model:`` reverts the closed boundaries and
+    everything ``apply_task_constraints`` does through cobra's API; the untracked direct
+    metabolite mass-balance bound edits are snapshotted and restored (as in check_tasks).
+    """
+    bounds, missing = _metabolite_bounds(task, name_to_id, comp_to_ids)
+    if missing:
+        return False
+    saved = {mid: (model.constraints[mid].lb, model.constraints[mid].ub) for mid in bounds}
+    try:
+        with model:
+            for rxn in model.boundary:
+                rxn.bounds = (0.0, 0.0)
+            _, error = apply_task_constraints(model, task, name_to_id, comp_to_ids)
+            if error is not None:
+                return False
+            model.slim_optimize()
+            return model.solver.status == "optimal"
+    finally:
+        for mid, (lb, ub) in saved.items():
+            _set_constraint_bounds(model.constraints[mid], lb, ub)
+
+
+def _fill_one_task(
+    model: cobra.Model, candidates: list[cobra.Reaction], task: Task,
+    costs: dict[str, float], *, mip_gap: float | None = None, time_limit: float | None = None,
+) -> list[str]:
+    """Min-cost set of ``candidates`` to make ``task`` feasible in ``model`` (the MILP).
+
+    ``mip_gap``/``time_limit`` bound this MILP (it has a binary per candidate reaction —
+    thousands). Unbounded, proving min-cost optimality is intractable when degraded input
+    has broken many tasks at once; a near-optimal fill (slightly more reactions) is the
+    right trade for tractability, exactly as for the main ftINIT MILP.
+    """
+    if not candidates:  # nothing left to add → task cannot be made feasible
+        raise RuntimeError(f"gap-filling found no candidates for task {task.id!r}.")
+    combined = _closed_copy(model)  # task I/O via the task's b, not the model's exchanges
+    combined.add_reactions([r.copy() for r in candidates])
+    name_to_id, comp_to_ids = task_name_maps(combined)
+    _, error = apply_task_constraints(combined, task, name_to_id, comp_to_ids)
+    if error is not None:
+        raise RuntimeError(f"task {task.id!r} could not be applied to the reference: {error}")
+
+    prob = combined.problem
+    extras = []
+    objective_terms = []
+    for cand in candidates:
+        rxn = combined.reactions.get_by_id(cand.id)
+        y = prob.Variable(f"_fill_{cand.id}", type="binary")
+        # off ⇒ no flux; on ⇒ the reaction's own bounds apply.
+        extras += [
+            y,
+            prob.Constraint(rxn.flux_expression - rxn.upper_bound * y, ub=0.0,
+                            name=f"_fillub_{cand.id}"),
+            prob.Constraint(rxn.flux_expression - rxn.lower_bound * y, lb=0.0,
+                            name=f"_filllb_{cand.id}"),
+        ]
+        objective_terms.append(mul([Real(costs[cand.id]), y]))
+    combined.add_cons_vars(extras)
+    # add() over a flat list, not Python sum() — the latter is O(n²) in sympy and with
+    # thousands of candidates dominates gap-fill runtime (see ftINIT/tINIT, same fix).
+    combined.objective = prob.Objective(add(objective_terms), direction="min")
+    if time_limit is not None:
+        combined.solver.configuration.timeout = int(time_limit)
+    if mip_gap is not None:
+        try:  # Gurobi-specific; harmless if the backend differs
+            combined.solver.problem.Params.MIPGap = mip_gap
+        except Exception:  # noqa: BLE001
+            pass
+    combined.slim_optimize()
+    # Accept a near-optimal incumbent (mip_gap/time_limit); only a truly infeasible fill
+    # (no incumbent) means the task cannot be satisfied from the reference.
+    if combined.solver.status not in ("optimal", "feasible", "suboptimal", "time_limit") or \
+            combined.variables[f"_fill_{candidates[0].id}"].primal is None:
+        raise RuntimeError(f"gap-filling found no way to make task {task.id!r} feasible.")
+    return [c.id for c in candidates
+            if (combined.variables[f"_fill_{c.id}"].primal or 0.0) > 0.5]
+
+
+def fill_tasks(
+    model: cobra.Model,
+    reference_model: cobra.Model,
+    tasks: Iterable[Task],
+    *,
+    rxn_scores: Mapping[str, float] | None = None,
+    mip_gap: float | None = None,
+    time_limit: float | None = None,
+) -> TaskFillResult:
+    """Add minimum-cost reference reactions so every task is feasible in ``model``.
+
+    ``reference_model`` supplies the candidate reactions (those not already in
+    ``model``, excluding exchange/boundary reactions). ``rxn_scores`` (original
+    reaction id → score) sets the cost of adding each candidate as ``−min(score,
+    −0.1)`` (missing → cost 1). Tasks already feasible are skipped; ``should_fail``
+    tasks are ignored. The model is carried forward, so later tasks see earlier
+    additions. Returns the gap-filled model and the reactions added.
+
+    Boundary reactions are closed while testing/solving each task, so task inputs and
+    outputs come solely from the task's ranged metabolite bounds (RAVEN gap-fills the
+    exchange-free model). The returned model keeps its boundary reactions.
+    """
+    scores = dict(rxn_scores or {})
+    tasks = list(tasks)
+    in_model = {r.id for r in model.reactions}
+    candidates = [r for r in reference_model.reactions
+                  if r.id not in in_model and not r.boundary]
+    costs = {r.id: -min(scores.get(r.id, _DEFAULT_SCORE), _MAX_SCORE) for r in candidates}
+
+    out = model.copy()
+    added: list[str] = []
+    failed: list[str] = []
+    for task in tasks:
+        if task.should_fail:
+            continue
+        name_to_id, comp_to_ids = task_name_maps(out)
+        if _feasible(out, task, name_to_id, comp_to_ids):
+            continue
+        # Only offer reactions not yet in the (growing) model.
+        present = {r.id for r in out.reactions}
+        avail = [r for r in candidates if r.id not in present]
+        try:
+            chosen = _fill_one_task(out, avail, task, costs, mip_gap=mip_gap, time_limit=time_limit)
+        except RuntimeError:
+            failed.append(task.id)
+            continue
+        if chosen:
+            out.add_reactions([reference_model.reactions.get_by_id(c).copy() for c in chosen])
+            added.extend(chosen)
+    return TaskFillResult(out, added, failed)
diff --git a/src/raven_python/io/__init__.py b/src/raven_python/io/__init__.py
new file mode 100644
index 0000000..bc70511
--- /dev/null
+++ b/src/raven_python/io/__init__.py
@@ -0,0 +1,15 @@
+"""RAVEN-specific I/O: YAML (cobra + Metabolic Atlas / Human-GEM extensions), SIF,
+Excel export, and the Standard-GEM ``model/<fmt>/…`` git layout.
+"""
+from raven_python.io.excel import export_to_excel
+from raven_python.io.git import export_for_git
+from raven_python.io.sif import export_model_to_sif
+from raven_python.io.yaml import read_yaml_model, write_yaml_model
+
+__all__ = [
+    "export_for_git",
+    "export_model_to_sif",
+    "export_to_excel",
+    "read_yaml_model",
+    "write_yaml_model",
+]
diff --git a/src/raven_python/io/excel.py b/src/raven_python/io/excel.py
new file mode 100644
index 0000000..cf6196e
--- /dev/null
+++ b/src/raven_python/io/excel.py
@@ -0,0 +1,136 @@
+"""Export a model to the RAVEN Microsoft Excel format.
+
+Writes the five-sheet RAVEN xlsx layout — RXNS, METS, COMPS, GENES, MODEL — pulling
+RAVEN-specific values back out of cobra's ``annotation`` / ``notes`` (where the
+raven_python YAML reader stashes them). Excel *import* is intentionally not provided.
+
+Requires the optional ``openpyxl`` dependency (``pip install raven_python[excel]``).
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import cobra
+
+
+def _miriam_string(annotation: dict, exclude: tuple[str, ...] = ()) -> str:
+    """RAVEN MIRIAM column: ``namespace/id;namespace/id2;...`` (sorted)."""
+    parts = []
+    for namespace in sorted(annotation):
+        if namespace in exclude:
+            continue
+        values = annotation[namespace]
+        if isinstance(values, str):
+            values = [values]
+        parts.extend(f"{namespace}/{value}" for value in values)
+    return ";".join(parts)
+
+
+def _equation(rxn: cobra.Reaction) -> str:
+    """Human-readable equation in RAVEN ``name[comp]`` form."""
+
+    def side(items):
+        return " + ".join(
+            f"{abs(coef):g} {met.name}[{met.compartment}]" for met, coef in items
+        )
+
+    reactants = [(m, c) for m, c in rxn.metabolites.items() if c < 0]
+    products = [(m, c) for m, c in rxn.metabolites.items() if c > 0]
+    arrow = " <=> " if rxn.reversibility else " => "
+    return f"{side(reactants)}{arrow}{side(products)}"
+
+
+def _ec_codes(rxn: cobra.Reaction) -> str:
+    codes = rxn.annotation.get("ec-code", [])
+    if isinstance(codes, str):
+        codes = [codes]
+    return ";".join(codes)
+
+
+def export_to_excel(
+    model: cobra.Model, path: str | Path, *, sort_ids: bool = False
+) -> None:
+    """Write ``model`` to a RAVEN-format ``.xlsx`` file.
+
+    Parameters
+    ----------
+    sort_ids
+        If True, write reactions/metabolites/genes sorted alphabetically by ID
+        (the model itself is not modified).
+    """
+    try:
+        from openpyxl import Workbook
+    except ImportError as exc:  # pragma: no cover - exercised only without openpyxl
+        raise ImportError(
+            "export_to_excel requires openpyxl. Install it with "
+            "`pip install raven_python[excel]` (or `pip install openpyxl`)."
+        ) from exc
+
+    reactions = sorted(model.reactions, key=lambda r: r.id) if sort_ids else list(model.reactions)
+    metabolites = (
+        sorted(model.metabolites, key=lambda m: m.id) if sort_ids else list(model.metabolites)
+    )
+    genes = sorted(model.genes, key=lambda g: g.id) if sort_ids else list(model.genes)
+    metadata = dict(model.notes.get("metaData", {})) if model.notes else {}
+
+    wb = Workbook()
+    wb.remove(wb.active)  # drop the default empty sheet
+
+    # --- RXNS ---
+    ws = wb.create_sheet("RXNS")
+    ws.append(
+        ["#", "ID", "NAME", "EQUATION", "EC-NUMBER", "GENE ASSOCIATION", "LOWER BOUND",
+         "UPPER BOUND", "OBJECTIVE", "COMPARTMENT", "MIRIAM", "SUBSYSTEM",
+         "REPLACEMENT ID", "NOTE", "REFERENCE", "CONFIDENCE SCORE"]
+    )
+    for r in reactions:
+        subsystem = r.subsystem
+        if isinstance(subsystem, (list, tuple)):
+            subsystem = ";".join(subsystem)
+        ws.append([
+            None, r.id, r.name, _equation(r), _ec_codes(r), r.gene_reaction_rule,
+            r.lower_bound, r.upper_bound,
+            r.objective_coefficient or None, None,
+            _miriam_string(r.annotation, exclude=("ec-code",)), subsystem, None,
+            r.notes.get("note"), r.notes.get("references"), r.notes.get("confidence_score"),
+        ])
+
+    # --- METS ---
+    ws = wb.create_sheet("METS")
+    ws.append(["#", "ID", "NAME", "UNCONSTRAINED", "MIRIAM", "COMPOSITION", "InChI",
+               "COMPARTMENT", "REPLACEMENT ID", "CHARGE"])
+    for m in metabolites:
+        inchi = m.notes.get("inchis")
+        ws.append([
+            None, f"{m.name}[{m.compartment}]", m.name, None,
+            _miriam_string(m.annotation, exclude=("smiles",)),
+            None if inchi else m.formula, inchi, m.compartment, m.id, m.charge,
+        ])
+
+    # --- COMPS ---
+    ws = wb.create_sheet("COMPS")
+    ws.append(["#", "ABBREVIATION", "NAME", "INSIDE", "MIRIAM"])
+    comps = sorted(model.compartments) if sort_ids else list(model.compartments)
+    for cid in comps:
+        ws.append([None, cid, model.compartments.get(cid, ""), None, None])
+
+    # --- GENES ---
+    if genes:
+        ws = wb.create_sheet("GENES")
+        ws.append(["#", "NAME", "MIRIAM", "SHORT NAME", "COMPARTMENT"])
+        for g in genes:
+            ws.append([None, g.id, _miriam_string(g.annotation), g.name, None])
+
+    # --- MODEL ---
+    ws = wb.create_sheet("MODEL")
+    ws.append(["#", "ID", "NAME", "TAXONOMY", "DEFAULT LOWER", "DEFAULT UPPER",
+               "CONTACT GIVEN NAME", "CONTACT FAMILY NAME", "CONTACT EMAIL",
+               "ORGANIZATION", "NOTES"])
+    ws.append([
+        None, model.id or "blankID", model.name or "blankName",
+        metadata.get("taxonomy"), metadata.get("defaultLB"), metadata.get("defaultUB"),
+        metadata.get("givenName"), metadata.get("familyName"), metadata.get("email"),
+        metadata.get("organization"), metadata.get("note"),
+    ])
+
+    wb.save(str(path))
diff --git a/src/raven_python/io/git.py b/src/raven_python/io/git.py
new file mode 100644
index 0000000..80bf8e8
--- /dev/null
+++ b/src/raven_python/io/git.py
@@ -0,0 +1,106 @@
+"""Export a model into a Standard-GEM versioned-repository layout.
+
+Writes the model in several formats into the Standard-GEM folder structure (a
+``model/`` directory with one subfolder per format), ready to commit to a
+Git-maintained model repository (Metabolic Atlas / Human-GEM / yeast-GEM style),
+plus a ``dependencies.txt`` recording tool versions.
+
+Thin orchestration over the writers raven_python already exposes: ``write_yaml_model``,
+cobra's ``write_sbml_model`` and ``save_matlab_model``, ``export_to_excel``, plus a
+single-file reaction table (txt).
+"""
+from __future__ import annotations
+
+import importlib.metadata as _md
+import platform
+from collections.abc import Iterable
+from pathlib import Path
+
+import cobra
+
+from raven_python.io.excel import _equation, export_to_excel
+from raven_python.io.yaml import write_yaml_model
+from raven_python.utils.sort import sort_identifiers
+
+_ALL_FORMATS = ("yml", "xml", "mat", "xlsx", "txt")
+
+
+def _version(package: str) -> str:
+    try:
+        return _md.version(package)
+    except _md.PackageNotFoundError:
+        return "unknown"
+
+
+def _write_txt(model: cobra.Model, path: Path) -> None:
+    """Single-file, human-readable reaction table (RAVEN exportForGit txt)."""
+    with open(path, "w", encoding="utf-8") as fh:
+        fh.write("Rxn name\tFormula\tGene-reaction association\tLB\tUB\tObjective\n")
+        for r in model.reactions:
+            fh.write(
+                f"{r.id}\t{_equation(r)}\t{r.gene_reaction_rule}\t"
+                f"{r.lower_bound:g}\t{r.upper_bound:g}\t{r.objective_coefficient:g}\n"
+            )
+
+
+def export_for_git(
+    model: cobra.Model,
+    path: str | Path = ".",
+    *,
+    prefix: str = "model",
+    formats: Iterable[str] = ("yml", "xml", "mat", "xlsx"),
+    sub_dirs: bool = True,
+) -> Path:
+    """Write ``model`` into a Standard-GEM repository layout.
+
+    Parameters
+    ----------
+    path
+        Directory to populate.
+    prefix
+        Base filename for every format (default ``"model"``).
+    formats
+        Which formats to write; any of ``"yml"``, ``"xml"``, ``"mat"``,
+        ``"xlsx"``, ``"txt"`` (default ``yml``/``xml``/``mat``/``xlsx``).
+    sub_dirs
+        If True (default), write ``model/<fmt>/<prefix>.<fmt>`` (standard-GEM
+        layout); otherwise all files go directly in ``path``.
+
+    Returns
+    -------
+    pathlib.Path
+        The root directory written to.
+    """
+    formats = list(formats)
+    unknown = set(formats) - set(_ALL_FORMATS)
+    if unknown:
+        raise ValueError(f"Unknown format(s): {sorted(unknown)}; allowed: {_ALL_FORMATS}")
+
+    # Sort a copy so the caller's model is untouched.
+    model = sort_identifiers(model.copy())
+
+    root = Path(path) / "model" if sub_dirs else Path(path)
+    root.mkdir(parents=True, exist_ok=True)
+
+    def target(fmt: str) -> Path:
+        folder = root / fmt if sub_dirs else root
+        folder.mkdir(parents=True, exist_ok=True)
+        return folder / f"{prefix}.{fmt}"
+
+    if "yml" in formats:
+        write_yaml_model(model, target("yml"))
+    if "xml" in formats:
+        cobra.io.write_sbml_model(model, str(target("xml")))
+    if "mat" in formats:
+        cobra.io.save_matlab_model(model, str(target("mat")))
+    if "xlsx" in formats:
+        export_to_excel(model, target("xlsx"))
+    if "txt" in formats:
+        _write_txt(model, target("txt"))
+
+    with open(root / "dependencies.txt", "w", encoding="utf-8") as fh:
+        fh.write(f"python\t{platform.python_version()}\n")
+        fh.write(f"cobra\t{_version('cobra')}\n")
+        fh.write(f"raven_python\t{_version('raven_python')}\n")
+
+    return root
diff --git a/src/raven_python/io/sif.py b/src/raven_python/io/sif.py
new file mode 100644
index 0000000..9e73efa
--- /dev/null
+++ b/src/raven_python/io/sif.py
@@ -0,0 +1,96 @@
+"""Export a model to Cytoscape SIF (Simple Interaction Format).
+
+Three graph types are supported:
+
+* ``"rc"`` reaction–compound: each reaction linked to its metabolites;
+* ``"rr"`` reaction–reaction: reactions linked when they share a metabolite;
+* ``"cc"`` compound–compound: each substrate linked to the products of the
+  reactions it feeds (computed on an irreversible copy, as RAVEN does, to avoid
+  spurious double links from reversible reactions).
+
+A SIF line is ``source <tab> graph_type <tab> target1 <tab> target2 ...``.
+"""
+from __future__ import annotations
+
+import warnings
+from collections import Counter
+from collections.abc import Mapping
+from pathlib import Path
+
+import cobra
+
+from raven_python.manipulation.irreversible import convert_to_irreversible
+
+_GRAPH_TYPES = ("rc", "rr", "cc")
+
+
+def _edges(model, graph_type):
+    """Yield (source_object, [target_objects]) per the graph type."""
+    if graph_type == "rc":
+        for rxn in model.reactions:
+            yield rxn, list(rxn.metabolites)
+    elif graph_type == "rr":
+        for rxn in model.reactions:
+            neighbours = {r for met in rxn.metabolites for r in met.reactions}
+            neighbours.discard(rxn)
+            yield rxn, list(neighbours)
+    else:  # cc — on an irreversible copy
+        irrev = model.copy()
+        convert_to_irreversible(irrev)
+        for met in irrev.metabolites:
+            products: set = set()
+            for rxn in met.reactions:
+                if rxn.get_coefficient(met) < 0:  # met is a substrate here
+                    products.update(m for m, c in rxn.metabolites.items() if c > 0)
+            yield met, list(products)
+
+
+def export_model_to_sif(
+    model: cobra.Model,
+    path: str | Path,
+    graph_type: str = "rc",
+    *,
+    reaction_labels: Mapping[str, str] | None = None,
+    metabolite_labels: Mapping[str, str] | None = None,
+) -> None:
+    """Write ``model`` to a Cytoscape SIF file.
+
+    Parameters
+    ----------
+    graph_type
+        ``"rc"`` (reaction–compound, default), ``"rr"`` (reaction–reaction), or
+        ``"cc"`` (compound–compound).
+    reaction_labels, metabolite_labels
+        Optional ``{id: label}`` maps overriding the node labels (default: IDs).
+    """
+    if graph_type not in _GRAPH_TYPES:
+        raise ValueError(f"graph_type must be one of {_GRAPH_TYPES}, got {graph_type!r}")
+
+    rlabels = reaction_labels or {}
+    mlabels = metabolite_labels or {}
+
+    # Warn when the label maps collapse multiple distinct ids onto the same
+    # label: target-side dedup runs on labels, so the collision silently merges
+    # two nodes into one edge. Only check the ids actually mapped (cobra default
+    # labels are ids, which can't collide).
+    for kind, lmap in (("reaction", rlabels), ("metabolite", mlabels)):
+        duplicates = [lab for lab, n in Counter(lmap.values()).items() if n > 1]
+        if duplicates:
+            warnings.warn(
+                f"{kind}_labels maps multiple ids to the same label(s) "
+                f"({duplicates[:5]}{'…' if len(duplicates) > 5 else ''}); "
+                "SIF nodes are keyed by label, so those nodes will collapse.",
+                stacklevel=2,
+            )
+
+    def label(obj) -> str:
+        if isinstance(obj, cobra.Reaction):
+            return rlabels.get(obj.id, obj.id)
+        return mlabels.get(obj.id, obj.id)
+
+    with open(path, "w", encoding="utf-8") as handle:
+        for source, targets in _edges(model, graph_type):
+            src = label(source)
+            names = sorted({label(t) for t in targets} - {src})
+            if names:
+                handle.write(f"{src}\t{graph_type}\t" + "\t".join(names) + "\n")
diff --git a/src/raven_python/io/yaml.py b/src/raven_python/io/yaml.py
new file mode 100644
index 0000000..151954b
--- /dev/null
+++ b/src/raven_python/io/yaml.py
@@ -0,0 +1,191 @@
+"""Read and write RAVEN/cobrapy YAML models.
+
+Aligned to RAVEN ``writeYAMLmodel.m`` / ``readYAMLmodel.m`` as of the
+``feat/geckopy-compat-yaml`` work (commit fa281a1), whose writer emits **cobra's
+native ``!!omap`` YAML**. Because the format *is* cobra's, the standard model
+content — id, name, compartments, and per-entry id/name/compartment/formula/
+charge/bounds/gene_reaction_rule/objective_coefficient/subsystem/metabolites and
+the whole ``annotation`` block (which carries ``smiles`` for metabolites,
+``ec-code`` for reactions, and all MIRIAM cross-references) — is read and written
+by ``cobra.io`` directly.
+
+This module only handles what cobra drops or mishandles:
+
+* **RAVEN-only top-level per-entry keys** that cobra ignores: ``inchis``,
+  ``deltaG``, ``metFrom`` and the free-text ``notes`` (metNotes) on metabolites;
+  ``confidence_score``, ``references``, ``rxnFrom``, ``deltaG`` and ``notes``
+  (rxnNotes) on reactions; ``protein`` on genes. These are stashed in the cobra
+  object's ``.notes`` dict on read and lifted back to top-level keys on write.
+* **Model-level extras** cobra ignores: ``version``, the ``metaData`` provenance
+  block, and the GECKO sections (``gecko_light``/``ec-rxns``/``ec-enzymes``),
+  preserved on ``model.notes`` for round-tripping.
+
+The reader also accepts the older RAVEN files (id/name nested in ``metaData``).
+"""
+from __future__ import annotations
+
+import gzip
+from collections import OrderedDict
+from pathlib import Path
+
+import cobra
+from cobra.io.dict import model_from_dict, model_to_dict
+from cobra.io.yaml import yaml as _cobra_yaml  # ruamel round-trip YAML (handles !!omap)
+
+
+def _open_text(path: str | Path, mode: str):
+    """Open ``path`` as a text handle, transparently gzipping when it ends ``.gz``."""
+    if str(path).endswith(".gz"):
+        return gzip.open(path, f"{mode}t", encoding="utf-8")
+    return open(path, mode, encoding="utf-8")
+
+# RAVEN-only top-level per-entry keys -> the key used inside the cobra object's
+# .notes dict. ('notes' is RAVEN's free-text metNotes/rxnNotes; stored under
+# 'note' to avoid colliding with the notes container itself.)
+_MET_FIELDS = (("inchis", "inchis"), ("deltaG", "deltaG"), ("metFrom", "metFrom"), ("notes", "note"))
+_RXN_FIELDS = (
+    ("confidence_score", "confidence_score"),
+    ("references", "references"),
+    ("rxnFrom", "rxnFrom"),
+    ("deltaG", "deltaG"),
+    ("notes", "note"),
+)
+_GENE_FIELDS = (("protein", "protein"),)
+
+_COBRA_TOP_KEYS = frozenset({"metabolites", "reactions", "genes", "compartments", "id", "name"})
+
+
+def _to_plain(obj):
+    if isinstance(obj, dict):
+        return {str(k): _to_plain(v) for k, v in obj.items()}
+    if isinstance(obj, (list, tuple)):
+        return [_to_plain(v) for v in obj]
+    if isinstance(obj, bool) or obj is None:
+        return obj
+    if isinstance(obj, int):
+        return int(obj)
+    if isinstance(obj, float):
+        return float(obj)
+    return obj if isinstance(obj, str) else str(obj)
+
+
+def _capture_entry_fields(entries, fields):
+    """Pop RAVEN-only top-level keys off each entry into a parallel notes dict.
+
+    Returns a list of ``{notes_key: value}`` dicts aligned with ``entries`` (so
+    cobra never sees these keys), to be attached to the built objects afterwards.
+    """
+    captured = []
+    for entry in entries:
+        notes = {}
+        for yaml_key, notes_key in fields:
+            if yaml_key in entry:
+                notes[notes_key] = entry.pop(yaml_key)
+        captured.append(notes)
+    return captured
+
+
+def read_yaml_model(path: str | Path) -> cobra.Model:
+    """Read a RAVEN/cobrapy YAML model into a ``cobra.Model``."""
+    with _open_text(path, "r") as handle:
+        raw = _to_plain(_cobra_yaml.load(handle))
+
+    if not isinstance(raw, dict):
+        raise ValueError(f"{path}: top-level YAML is a {type(raw).__name__}, not a mapping.")
+
+    metadata = raw.pop("metaData", None) or {}
+    version = raw.pop("version", None)
+    foreign = {k: raw.pop(k) for k in list(raw) if k not in _COBRA_TOP_KEYS}
+
+    met_notes = _capture_entry_fields(raw.get("metabolites", []), _MET_FIELDS)
+    rxn_notes = _capture_entry_fields(raw.get("reactions", []), _RXN_FIELDS)
+    gene_notes = _capture_entry_fields(raw.get("genes", []), _GENE_FIELDS)
+
+    model = model_from_dict(raw)
+
+    for met, notes in zip(model.metabolites, met_notes, strict=False):
+        met.notes = notes
+    for rxn, notes in zip(model.reactions, rxn_notes, strict=False):
+        rxn.notes = notes
+    for gene, notes in zip(model.genes, gene_notes, strict=False):
+        gene.notes = notes
+
+    # Legacy files keep id/name inside metaData; restore them if cobra found none.
+    if metadata.get("id") and not model.id:
+        model.id = metadata["id"]
+    if metadata.get("name") and not model.name:
+        model.name = metadata["name"]
+    if metadata:
+        model.notes["metaData"] = metadata
+    if version is not None:
+        model.notes["version"] = version
+    if foreign:
+        model.notes["_yaml_sections"] = foreign
+
+    return model
+
+
+def _emit_entry_fields(entries, fields):
+    """Lift RAVEN-only keys out of each entry's ``notes`` dict to top level."""
+    for entry in entries:
+        notes = entry.pop("notes", None)
+        if not isinstance(notes, dict):
+            continue
+        notes = dict(notes)
+        for yaml_key, notes_key in fields:
+            if notes_key in notes:
+                entry[yaml_key] = notes.pop(notes_key)
+        # Preserve any remaining (non-RAVEN) notes. The RAVEN free-text note is lifted
+        # to the YAML key "notes"; if leftovers also exist, merge them with it under
+        # that key (rather than silently dropping the leftovers).
+        if notes:
+            if "notes" in entry:
+                notes["note"] = entry["notes"]
+            entry["notes"] = notes
+
+
+def write_yaml_model(
+    model: cobra.Model, path: str | Path, *, sort_ids: bool = False
+) -> None:
+    """Write a ``cobra.Model`` to RAVEN/cobrapy (``!!omap``) YAML.
+
+    With ``sort_ids=True`` metabolites/reactions/genes/compartments are written
+    in alphabetical order (diff-friendly), without modifying ``model``.
+    """
+    model_notes = dict(model.notes or {})
+    stored_meta = model_notes.pop("metaData", None) or {}
+    version = model_notes.pop("version", None)
+    foreign = model_notes.pop("_yaml_sections", None) or {}
+
+    doc = OrderedDict(_to_plain(model_to_dict(model)))
+
+    if sort_ids:
+        for section in ("metabolites", "reactions", "genes"):
+            if section in doc:
+                doc[section] = sorted(doc[section], key=lambda e: e.get("id", ""))
+        if isinstance(doc.get("compartments"), dict):
+            doc["compartments"] = dict(sorted(doc["compartments"].items()))
+
+    _emit_entry_fields(doc.get("metabolites", []), _MET_FIELDS)
+    _emit_entry_fields(doc.get("reactions", []), _RXN_FIELDS)
+    _emit_entry_fields(doc.get("genes", []), _GENE_FIELDS)
+
+    # cobra dict order is metabolites, reactions, genes, id, name, compartments;
+    # append version / gecko_light / metaData / ec-* like RAVEN's writer.
+    if version is not None:
+        doc["version"] = version
+    metadata = dict(stored_meta)
+    if model.id:
+        metadata.setdefault("id", model.id)
+    if model.name:
+        metadata.setdefault("name", model.name)
+    for key in ("gecko_light",):
+        if key in foreign:
+            doc[key] = foreign.pop(key)
+    if metadata:
+        doc["metaData"] = metadata
+    for key, value in foreign.items():
+        doc[key] = value
+
+    with _open_text(path, "w") as handle:
+        _cobra_yaml.dump(doc, handle)
diff --git a/src/raven_python/localization/__init__.py b/src/raven_python/localization/__init__.py
new file mode 100644
index 0000000..c6071e0
--- /dev/null
+++ b/src/raven_python/localization/__init__.py
@@ -0,0 +1,27 @@
+"""Sub-cellular localisation — predictor-agnostic, partial-update friendly.
+
+:func:`predict_localization` is the MILP entry point;
+:func:`load_wolfpsort` / :func:`load_deeploc` parse predictor outputs into the
+``gene × compartment`` :class:`LocalizationScores` DataFrame the algorithm consumes.
+"""
+from raven_python.localization.predict import (
+    LocalizationProposal,
+    LocalizationResult,
+    apply_localization,
+    predict_localization,
+)
+from raven_python.localization.scores import (
+    LocalizationScores,
+    load_deeploc,
+    load_wolfpsort,
+)
+
+__all__ = [
+    "LocalizationProposal",
+    "LocalizationResult",
+    "LocalizationScores",
+    "apply_localization",
+    "load_deeploc",
+    "load_wolfpsort",
+    "predict_localization",
+]
diff --git a/src/raven_python/localization/predict.py b/src/raven_python/localization/predict.py
new file mode 100644
index 0000000..0fb8596
--- /dev/null
+++ b/src/raven_python/localization/predict.py
@@ -0,0 +1,378 @@
+"""Sub-cellular localisation by MILP.
+
+Assigns reactions to compartments by maximising per-gene localisation evidence minus
+inter-compartment transport cost. Key behaviour:
+
+* The caller passes the set of reactions to (re-)place (``reactions_to_relocate``);
+  everything else is pinned. Boundary reactions and existing inter-compartment
+  transports are always pinned even if listed.
+* Incomplete models are tolerated — no silent reaction removal for "metabolite not
+  produced". Reactions with no scored genes are reported in ``unplaced_reactions``.
+* Deterministic MILP solve (Gurobi / HiGHS / GLPK).
+* ``apply=False`` returns a :class:`LocalizationProposal` (a diff) without mutating.
+* **Multi-compartment by default.** A gene can land in several compartments — its
+  highest-scoring compartment is "free", every additional compartment costs
+  ``multi_compartment_penalty``. Secondary compartments naturally have lower predictor
+  scores (an implicit penalty) and are only picked when their score still exceeds the
+  explicit penalty. Set ``multi_compartment_penalty`` very high for effectively
+  mono-localised genes.
+
+Limitations to be aware of:
+
+* Isozyme separation is *not* applied internally — a reaction with isozymes is treated
+  as "all listed genes must share its compartment". For per-isozyme placement, call
+  :func:`raven_python.manipulation.expand_model` first.
+* Transports are routed through ``default_compartment``.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from dataclasses import dataclass, field
+
+import cobra
+import pandas as pd
+from optlang.symbolics import Real, add, mul
+
+from raven_python.localization.scores import LocalizationScores
+
+
+@dataclass
+class LocalizationProposal:
+    """What :func:`predict_localization` proposes, before applying it.
+
+    All DataFrames have one row per item. Use this with ``apply=False`` to preview
+    changes; pass it back to :func:`apply_localization` to commit, or diff against a
+    curator's expectations.
+    """
+
+    moved: pd.DataFrame                       # rxn_id, from_compartment, to_compartment
+    added_transports: pd.DataFrame            # met_id, compartment (other than default)
+    gene_compartments: dict[str, list[str]]   # gene_id → list of compartments assigned
+    unplaced_reactions: list[str] = field(default_factory=list)  # had no scored gene support
+    objective: float = 0.0
+
+
+@dataclass
+class LocalizationResult:
+    """Outcome of :func:`predict_localization` (when ``apply=True``)."""
+
+    model: cobra.Model
+    proposal: LocalizationProposal
+    added_transports: list[cobra.Reaction] = field(default_factory=list)
+
+
+# --------------------------------------------------------------------- helpers
+
+def _reaction_compartment(rxn: cobra.Reaction) -> str | None:
+    """Single compartment id if all metabolites share one, else ``None`` (transport)."""
+    comps = {m.compartment for m in rxn.metabolites if m.compartment}
+    return next(iter(comps)) if len(comps) == 1 else None
+
+
+def _reaction_genes(rxn: cobra.Reaction) -> list[str]:
+    """Genes on the reaction's GPR (flat list; no AND/OR distinction in this v1)."""
+    return [g.id for g in rxn.genes]
+
+
+# --------------------------------------------------------------------- the MILP
+
+def predict_localization(
+    model: cobra.Model,
+    scores: LocalizationScores,
+    reactions_to_relocate: Iterable[str],
+    *,
+    default_compartment: str = "c",
+    transport_cost: float | Mapping[str, float] = 0.5,
+    multi_compartment_penalty: float = 0.5,
+    apply: bool = True,
+    mip_gap: float | None = None,
+    time_limit: float | None = None,
+) -> LocalizationResult | LocalizationProposal:
+    """Place a caller-specified set of reactions in compartments via MILP.
+
+    Returns a :class:`LocalizationProposal` (when ``apply=False``) or a
+    :class:`LocalizationResult` (when ``apply=True``).
+
+    ``reactions_to_relocate``: the reaction ids to (re-)place. Everything else stays
+    where it is. Boundary reactions and existing multi-compartment transports passed
+    in this set are silently filtered out (always pinned). Pass an empty set or a list
+    of zero non-boundary reactions to no-op.
+
+    ``transport_cost``: either a scalar (same cost per added transport) or a mapping
+    ``{metabolite_id_base: cost}`` (where the base id strips the compartment suffix,
+    e.g. ``"glc__D"`` matches ``"glc__D_c"``/``"glc__D_e"``). Negative costs *favour*
+    adding the transport.
+
+    **Multi-compartment gene scoring (default behaviour):** a gene contributes its
+    predictor score in each compartment it lands in; the highest-scoring compartment
+    is "free", each additional compartment costs ``multi_compartment_penalty``. A
+    secondary compartment is only worth picking when its score (typically lower than
+    the primary) still exceeds the penalty — no hard cutoff, just an explicit
+    score-vs-penalty trade-off. Set ``multi_compartment_penalty`` very large for
+    effectively mono-localised genes.
+    """
+    # ---- 1. Scope: which reactions move, which are pinned. -----------------
+    to_relocate = set(reactions_to_relocate)
+    # Boundaries / transports always pin (even if listed).
+    to_relocate -= {r.id for r in model.reactions
+                    if r.boundary or _reaction_compartment(r) is None}
+    if not to_relocate:
+        return _empty_result(model, apply)
+
+    # ---- 2. Compartments universe (model + scores). ------------------------
+    compartments = sorted(set(model.compartments) | set(scores.compartments))
+    if default_compartment not in compartments:
+        raise ValueError(f"default_compartment={default_compartment!r} not in known "
+                         f"compartments {compartments}")
+
+    # ---- 3. Gather genes for the relocate-set, build score lookup. ---------
+    # Genes only mentioned by pinned reactions don't enter the MILP.
+    moving = [model.reactions.get_by_id(rid) for rid in sorted(to_relocate)]
+    genes_in_scope: set[str] = set()
+    unplaced: list[str] = []
+    for r in moving:
+        gs = _reaction_genes(r)
+        scored = [g for g in gs if g in scores.df.index]
+        if not gs:
+            # GPR-less reaction: place it freely (no gene coupling). Allowed.
+            continue
+        if not scored:
+            # All genes absent from predictor → no signal; report and skip.
+            unplaced.append(r.id)
+            continue
+        genes_in_scope.update(scored)
+    # Remove reactions we can't score from the placement set.
+    placeable = [r for r in moving if r.id not in set(unplaced)]
+    if not placeable:
+        # Everything in the relocate set lacks scored genes — return a proposal with
+        # only the unplaced list.
+        prop = LocalizationProposal(
+            moved=pd.DataFrame(columns=["rxn_id", "from_compartment", "to_compartment"]),
+            added_transports=pd.DataFrame(columns=["met_id", "compartment"]),
+            gene_compartments={}, unplaced_reactions=unplaced, objective=0.0)
+        return prop if not apply else LocalizationResult(model, prop)
+
+    # ---- 4. Per-metabolite transport cost. ---------------------------------
+    def _met_cost(m_id: str) -> float:
+        if not isinstance(transport_cost, (int, float)):
+            base = m_id.rsplit("_", 1)[0]
+            return float(transport_cost.get(base, transport_cost.get(m_id, 0.5)))
+        return float(transport_cost)
+
+    # ---- 5. Build the MILP. ------------------------------------------------
+    model.solver  # noqa: B018 — ensure the solver is initialised so model.problem works
+    prob = model.problem
+    opt = prob.Model()
+
+    # x[r, c] = 1 iff reaction r placed in c (only for r ∈ placeable)
+    x: dict[tuple[str, str], object] = {
+        (r.id, c): prob.Variable(f"x_{r.id}_{c}", type="binary")
+        for r in placeable for c in compartments
+    }
+    # y[g, c] = 1 iff gene g assigned to c
+    y: dict[tuple[str, str], object] = {
+        (g, c): prob.Variable(f"y_{g}_{c}", type="binary")
+        for g in genes_in_scope for c in compartments
+    }
+    # t[m_id, c] = 1 iff metabolite m (with id including its current compartment suffix)
+    # needs a transport to compartment c (c ≠ default). One per (base met, c).
+    met_keys: set[tuple[str, str]] = set()
+    for r in placeable:
+        for m in r.metabolites:
+            for c in compartments:
+                if c != default_compartment:
+                    met_keys.add((m.id, c))
+    t: dict[tuple[str, str], object] = {
+        k: prob.Variable(f"t_{k[0]}_{k[1]}", type="binary") for k in met_keys
+    }
+
+    cons: list = []
+    # 5a. Each placeable reaction goes to exactly one compartment.
+    for r in placeable:
+        cons.append(prob.Constraint(add([mul([Real(1.0), x[r.id, c]]) for c in compartments]),
+                                     lb=1.0, ub=1.0, name=f"place_{r.id}"))
+    # 5b. Gene-reaction coupling: if r placed in c, every scored gene of r must be in c.
+    for r in placeable:
+        for g in _reaction_genes(r):
+            if g not in genes_in_scope:
+                continue
+            for c in compartments:
+                # x[r,c] − y[g,c] ≤ 0
+                cons.append(prob.Constraint(x[r.id, c] - y[g, c], ub=0.0,
+                                             name=f"gene_{r.id}_{g}_{c}"))
+    # 5c. Gene assignment: each gene in scope lands in ≥1 compartment. Multi is allowed;
+    # the multi_compartment_penalty in the objective keeps extras from coming for free.
+    for g in genes_in_scope:
+        s = add([mul([Real(1.0), y[g, c]]) for c in compartments])
+        cons.append(prob.Constraint(s, lb=1.0, name=f"gene_one_{g}"))
+    # 5d. Transport requirement: t[m,c] ≥ x[r,c] whenever r touches m and c ≠ default.
+    for r in placeable:
+        for m in r.metabolites:
+            for c in compartments:
+                if c == default_compartment:
+                    continue
+                # x[r,c] − t[m,c] ≤ 0
+                cons.append(prob.Constraint(x[r.id, c] - t[m.id, c], ub=0.0,
+                                             name=f"trans_{r.id}_{m.id}_{c}"))
+
+    opt.add(list(x.values()) + list(y.values()) + list(t.values()) + cons)
+
+    # 5e. Objective.
+    obj_terms = []
+    # + per-gene per-compartment localisation score (rows missing → 0)
+    score_lookup = scores.df  # gene_id × compartment → float
+    for g in genes_in_scope:
+        for c in compartments:
+            s = float(score_lookup.at[g, c]) if c in score_lookup.columns and not pd.isna(score_lookup.at[g, c]) else 0.0
+            if s:
+                obj_terms.append(mul([Real(s), y[g, c]]))
+    # − transport cost per added transport
+    for (m_id, _c), tvar in t.items():
+        cost = _met_cost(m_id)
+        if cost:
+            obj_terms.append(mul([Real(-cost), tvar]))
+    # − multi-compartment penalty per *extra* compartment (the primary is free).
+    # Per gene: penalty * (Σ_c y[g,c] - 1) = penalty * Σ_c y[g,c] - penalty (constant). The
+    # constant doesn't affect optimisation but is added back to the reported objective so
+    # the value matches the "primary free" intent the user reads off the proposal.
+    constant_offset = 0.0
+    if multi_compartment_penalty:
+        for yvar in y.values():
+            obj_terms.append(mul([Real(-multi_compartment_penalty), yvar]))
+        constant_offset = multi_compartment_penalty * len(genes_in_scope)
+
+    opt.objective = prob.Objective(add(obj_terms) if obj_terms else Real(0.0), direction="max")
+    if time_limit is not None:
+        opt.configuration.timeout = int(time_limit)
+    if mip_gap is not None:
+        try:  # Gurobi-specific
+            opt.problem.Params.MIPGap = mip_gap
+        except Exception:  # noqa: BLE001
+            pass
+
+    opt.optimize()
+    if opt.status not in ("optimal", "feasible", "suboptimal", "time_limit"):
+        raise RuntimeError(f"localisation MILP did not solve (status: {opt.status}).")
+
+    # ---- 6. Read the solution into a proposal. -----------------------------
+    moved_rows: list[dict] = []
+    for r in placeable:
+        chosen = None
+        for c in compartments:
+            if (x[r.id, c].primal or 0.0) >= 0.5:
+                chosen = c
+                break
+        from_c = _reaction_compartment(r)
+        if chosen and chosen != from_c:
+            moved_rows.append({"rxn_id": r.id, "from_compartment": from_c,
+                                "to_compartment": chosen})
+    moved = pd.DataFrame(moved_rows, columns=["rxn_id", "from_compartment", "to_compartment"])
+
+    transp_rows: list[dict] = []
+    for (m_id, c), tvar in t.items():
+        if (tvar.primal or 0.0) >= 0.5:
+            transp_rows.append({"met_id": m_id, "compartment": c})
+    added_transports = pd.DataFrame(transp_rows, columns=["met_id", "compartment"])
+
+    gene_comps: dict[str, list[str]] = {}
+    for g in genes_in_scope:
+        in_c = [c for c in compartments if (y[g, c].primal or 0.0) >= 0.5]
+        gene_comps[g] = in_c
+
+    proposal = LocalizationProposal(
+        moved=moved, added_transports=added_transports, gene_compartments=gene_comps,
+        unplaced_reactions=unplaced,
+        objective=float((opt.objective.value or 0.0) + constant_offset))
+
+    if not apply:
+        return proposal
+    new_model, transports = apply_localization(model, proposal, default_compartment=default_compartment)
+    return LocalizationResult(model=new_model, proposal=proposal, added_transports=transports)
+
+
+# --------------------------------------------------------------------- apply
+
+def apply_localization(
+    model: cobra.Model,
+    proposal: LocalizationProposal,
+    *,
+    default_compartment: str = "c",
+) -> tuple[cobra.Model, list[cobra.Reaction]]:
+    """Apply a :class:`LocalizationProposal` to ``model``: move reactions, add the
+    inter-compartment transports the proposal listed, and return ``(model_copy, added)``.
+
+    The returned model is a deep copy of the input (original left untouched). Moved
+    reactions get their metabolites' compartment suffix swapped (e.g. ``A_c → A_m``);
+    new compartment-specific metabolite copies are added on demand. Each added
+    transport is a passive diffusion ``M[default] ⇌ M[c]`` (RAVEN convention),
+    named ``tr_<met>_<c>``.
+    """
+    out = model.copy()
+    added: list[cobra.Reaction] = []
+
+    # 1. Move each reaction by remapping its metabolites to the target compartment.
+    for _, row in proposal.moved.iterrows():
+        rxn = out.reactions.get_by_id(row["rxn_id"])
+        target = row["to_compartment"]
+        new_stoich: dict[cobra.Metabolite, float] = {}
+        old = list(rxn.metabolites.items())
+        # Clear current stoichiometry first so cobra updates the constraints cleanly.
+        rxn.subtract_metabolites(dict(old))
+        for m, coeff in old:
+            m_new = _met_in_compartment(out, m, target)
+            new_stoich[m_new] = coeff
+        rxn.add_metabolites(new_stoich)
+
+    # 2. Add transports between default and each requested compartment.
+    for _, row in proposal.added_transports.iterrows():
+        m_id, c = row["met_id"], row["compartment"]
+        if m_id not in out.metabolites:
+            continue
+        m_src = out.metabolites.get_by_id(m_id)
+        if m_src.compartment == c:
+            continue  # already there; no transport needed
+        m_default = _met_in_compartment(out, m_src, default_compartment)
+        m_dest = _met_in_compartment(out, m_src, c)
+        if m_default.id == m_dest.id:
+            continue
+        tr_id = f"tr_{_base_met_id(m_src)}_{c}"
+        if tr_id in out.reactions:
+            continue
+        tr = cobra.Reaction(tr_id, lower_bound=-1000, upper_bound=1000)
+        tr.add_metabolites({m_default: -1.0, m_dest: 1.0})
+        tr.notes["localization"] = "added by predict_localization"
+        out.add_reactions([tr])
+        added.append(out.reactions.get_by_id(tr_id))
+
+    return out, added
+
+
+def _base_met_id(m: cobra.Metabolite) -> str:
+    """Strip the trailing ``_<compartment>`` suffix (or return id as-is)."""
+    if m.compartment and m.id.endswith(f"_{m.compartment}"):
+        return m.id[: -(len(m.compartment) + 1)]
+    return m.id
+
+
+def _met_in_compartment(model: cobra.Model, source: cobra.Metabolite,
+                        compartment: str) -> cobra.Metabolite:
+    """Return (creating if needed) the copy of ``source`` in ``compartment``."""
+    if source.compartment == compartment:
+        return source
+    base = _base_met_id(source)
+    new_id = f"{base}_{compartment}"
+    if new_id in model.metabolites:
+        return model.metabolites.get_by_id(new_id)
+    new_met = cobra.Metabolite(new_id, name=source.name, compartment=compartment,
+                               formula=source.formula, charge=source.charge)
+    new_met.notes = dict(source.notes or {})
+    model.add_metabolites([new_met])
+    return new_met
+
+
+def _empty_result(model: cobra.Model, apply_flag: bool):
+    proposal = LocalizationProposal(
+        moved=pd.DataFrame(columns=["rxn_id", "from_compartment", "to_compartment"]),
+        added_transports=pd.DataFrame(columns=["met_id", "compartment"]),
+        gene_compartments={}, unplaced_reactions=[], objective=0.0)
+    return proposal if not apply_flag else LocalizationResult(model.copy(), proposal)
diff --git a/src/raven_python/localization/scores.py b/src/raven_python/localization/scores.py
new file mode 100644
index 0000000..4fabc03
--- /dev/null
+++ b/src/raven_python/localization/scores.py
@@ -0,0 +1,117 @@
+"""Loaders for gene → compartment localisation predictors (WoLF PSORT, DeepLoc, …).
+
+The localisation algorithm in :mod:`raven_python.localization.predict` consumes a
+*gene × compartment* score table (:class:`LocalizationScores`) where higher = stronger
+evidence. Each predictor produces this differently; loaders here normalise them. The
+format is open — a user can build a :class:`LocalizationScores` from any source by
+constructing the :class:`pandas.DataFrame` directly.
+
+Each loader normalises each gene's row so the best compartment is 1.0 (RAVEN's
+``parseScores`` convention), which lets transport costs be set on a comparable scale.
+"""
+from __future__ import annotations
+
+import re
+from collections.abc import Mapping
+from dataclasses import dataclass
+from pathlib import Path
+
+import pandas as pd
+
+
+@dataclass
+class LocalizationScores:
+    """Per-gene compartment scores. ``df`` is indexed by ``gene_id`` with one column per
+    compartment id; values are floats (higher = stronger evidence for that compartment).
+
+    Genes absent from ``df`` and NaN entries are treated as "no signal" by
+    :func:`raven_python.localization.predict_localization` (uniform prior contribution).
+    """
+
+    df: pd.DataFrame
+
+    def __post_init__(self) -> None:
+        if not isinstance(self.df.index, pd.Index) or self.df.index.name not in (None, "gene_id"):
+            # accept but normalise
+            self.df = self.df.copy()
+            self.df.index.name = "gene_id"
+
+    @property
+    def genes(self) -> list[str]:
+        return list(self.df.index)
+
+    @property
+    def compartments(self) -> list[str]:
+        return list(self.df.columns)
+
+    def with_compartments(self, mapping: Mapping[str, str]) -> LocalizationScores:
+        """Rename compartment columns via ``{old: new}`` (e.g. predictor labels →
+        model compartments). Unmapped columns are kept; multiple sources can be merged
+        with ``df.combine_first`` afterwards."""
+        return LocalizationScores(self.df.rename(columns=dict(mapping)))
+
+
+# ----------------------------------------------------------------------- WoLF PSORT
+
+# WoLF PSORT summary lines look like:
+#     PROTEIN_ID cyto 13, nucl 7, mito 4
+# with comments starting '#' and noisy 'treating ...' lines (which we drop).
+_WOLF_COMMA = re.compile(r"[,]\s*")
+
+
+def load_wolfpsort(path: str | Path) -> LocalizationScores:
+    """Parse WoLF PSORT summary output (``runWolfPsortSummary``) into a normalised
+    :class:`LocalizationScores`. Rows like ``PROT: treating N X's as ...`` are skipped."""
+    rows: dict[str, dict[str, float]] = {}
+    for line in Path(path).read_text().splitlines():
+        line = line.strip()
+        if not line or line.startswith("#") or "treating " in line:
+            continue
+        tokens = _WOLF_COMMA.sub(" ", line).split()
+        if len(tokens) < 3 or (len(tokens) - 1) % 2 != 0:
+            continue  # malformed; skip
+        gene = tokens[0]
+        comp_scores: dict[str, float] = {}
+        for comp, score in zip(tokens[1::2], tokens[2::2], strict=True):
+            try:
+                comp_scores[comp] = float(score)
+            except ValueError:
+                continue
+        if comp_scores:
+            rows[gene] = comp_scores
+    df = pd.DataFrame.from_dict(rows, orient="index").fillna(0.0)
+    df.index.name = "gene_id"
+    return _normalise_rows(LocalizationScores(df))
+
+
+# ----------------------------------------------------------------------- DeepLoc
+
+def load_deeploc(path: str | Path) -> LocalizationScores:
+    """Parse DeepLoc 2 CSV output into a normalised :class:`LocalizationScores`.
+
+    DeepLoc 2's per-protein CSV has columns ``Protein_ID, Localizations, Signals,
+    <Compartment1>, <Compartment2>, ...`` where columns 4+ are per-class probabilities.
+    The first three metadata columns are dropped; the rest become compartment columns.
+    """
+    df = pd.read_csv(path)
+    if df.shape[1] < 4:
+        raise ValueError(f"{path}: expected ≥4 columns from DeepLoc, got {list(df.columns)}")
+    gene_col = df.columns[0]            # Protein_ID
+    comp_cols = list(df.columns[3:])    # cols 0-2 are Protein_ID/Localizations/Signals metadata
+    scores = df.set_index(gene_col)[comp_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
+    scores.index.name = "gene_id"
+    return _normalise_rows(LocalizationScores(scores))
+
+
+# ----------------------------------------------------------------------- helpers
+
+def _normalise_rows(s: LocalizationScores) -> LocalizationScores:
+    """Per-gene row normalisation: best compartment → 1.0 (RAVEN's parseScores convention).
+
+    Rows whose max is ≤0 are left unscaled (no positive evidence to normalise against).
+    """
+    df = s.df.copy()
+    row_max = df.max(axis=1)
+    safe = row_max > 0
+    df.loc[safe] = df.loc[safe].div(row_max[safe], axis=0)
+    return LocalizationScores(df)
diff --git a/src/raven_python/manipulation/__init__.py b/src/raven_python/manipulation/__init__.py
new file mode 100644
index 0000000..074c36f
--- /dev/null
+++ b/src/raven_python/manipulation/__init__.py
@@ -0,0 +1,36 @@
+"""Generic cobra.Model structural transforms that cobrapy does not cover cleanly:
+reaction building from equations, batch GPR / bound changes, irreversibility splitting,
+isozyme expansion, compartment merge / copy, and model merging by name."""
+from .add import add_reactions_from_equations
+from .change import change_gene_reaction_rules, change_reaction_equations
+from .expand import expand_model
+from .irreversible import convert_to_irreversible
+from .merge import merge_models
+from .parameters import set_variance_bounds
+from .remove import remove_genes, remove_metabolites
+from .simplify import (
+    constrain_reversible_reactions,
+    group_linear_reactions,
+    remove_dead_end_reactions,
+    remove_duplicate_reactions,
+)
+from .transfer import add_reactions_from_model
+from .transport import add_transport_reactions
+
+__all__ = [
+    "add_reactions_from_equations",
+    "add_reactions_from_model",
+    "add_transport_reactions",
+    "change_gene_reaction_rules",
+    "change_reaction_equations",
+    "constrain_reversible_reactions",
+    "convert_to_irreversible",
+    "expand_model",
+    "group_linear_reactions",
+    "merge_models",
+    "remove_dead_end_reactions",
+    "remove_duplicate_reactions",
+    "remove_genes",
+    "remove_metabolites",
+    "set_variance_bounds",
+]
diff --git a/src/raven_python/manipulation/add.py b/src/raven_python/manipulation/add.py
new file mode 100644
index 0000000..3842297
--- /dev/null
+++ b/src/raven_python/manipulation/add.py
@@ -0,0 +1,345 @@
+"""Add reactions to a model from equation strings.
+
+Most of the equivalent MATLAB code is struct-of-arrays bookkeeping (padding parallel
+``rxnNames`` / ``lb`` / ``ub`` / ``grRules`` / ... fields) that does not exist in
+cobra, where each ``Reaction`` carries its own attributes. cobra also already
+covers a large part of the *behaviour*:
+
+* ``Reaction.build_reaction_from_string`` parses equation strings, coefficients,
+  and reversibility arrows (``<=>``, ``-->``, ``=>``) and creates unknown
+  metabolites — but only matching metabolites **by ID**, and it leaves new
+  metabolites with ``compartment=None``.
+* assigning ``reaction.gene_reaction_rule`` auto-creates ``Gene`` objects.
+
+So this port keeps only the parts cobra lacks:
+
+* **name-based matching** — interpret equation tokens as metabolite *names*
+  (RAVEN eqnType 2) or as ``name[comp]`` (eqnType 3), not just IDs;
+* **correct compartment** assignment for newly created metabolites;
+* **strict policies** — optionally *error* (rather than silently create) on
+  unknown metabolites or genes, and always error on a duplicate reaction ID
+  (cobra silently ignores those).
+
+Instead of RAVEN's ``eqnType`` integer (1/2/3) the matching mode is a readable
+keyword: ``mets_by="id"`` or ``mets_by="name"``, with ``name[comp]`` recognised
+automatically. See IMPROVEMENTS.md (A-series) for the rationale.
+"""
+from __future__ import annotations
+
+import re
+import warnings
+from collections import OrderedDict
+from collections.abc import Mapping, Sequence
+
+import cobra
+from cobra import Metabolite, Reaction
+from cobra.core.gene import GPR
+
+from raven_python.utils.parse import parse_name_comp
+
+# Reversibility arrows. ``<=>`` must be tried before ``=>`` (it contains it).
+_REVERSIBLE_ARROWS = ("<=>",)
+_FORWARD_ARROWS = ("-->", "->", "=>")
+
+
+def _split_equation(equation: str) -> tuple[str, str, bool]:
+    """Split an equation into (lhs, rhs, reversible) on its arrow."""
+    for arrow in _REVERSIBLE_ARROWS:
+        if arrow in equation:
+            lhs, rhs = equation.split(arrow, 1)
+            return lhs, rhs, True
+    for arrow in _FORWARD_ARROWS:
+        if arrow in equation:
+            lhs, rhs = equation.split(arrow, 1)
+            return lhs, rhs, False
+    raise ValueError(f"No reaction arrow (<=>, -->, =>) found in equation: {equation!r}")
+
+
+def _parse_side(side: str) -> list[tuple[float, str, str | None]]:
+    """Parse one side of an equation into ``[(coefficient, token, fallback), ...]``.
+
+    The ``fallback`` slot is for the ambiguous ``"<number> <rest>"`` shape: when
+    matching by name, ``"2 oxoglutarate"`` could be either ``coeff=2, name="oxoglutarate"``
+    or ``coeff=1, name="2 oxoglutarate"`` (a real chemistry name). We return the
+    coefficient-split form as the primary and the full term as the fallback; the
+    resolver picks whichever matches an existing metabolite. Pure-number heads
+    with no name (``"2"``) and pure-name terms (``"glucose"``) have no fallback.
+    """
+    terms: list[tuple[float, str, str | None]] = []
+    for raw in side.split(" + "):
+        term = raw.strip()
+        if not term:
+            continue
+        head, _, tail = term.partition(" ")
+        try:
+            coeff = float(head)
+            token = tail.strip()
+        except ValueError:
+            coeff, token = 1.0, term
+            fallback = None
+        else:
+            # Coefficient-split succeeded. Keep the full term as a fallback when
+            # the tail is non-empty so name-resolution can re-try it as one token.
+            fallback = term if token else None
+        if not token:
+            raise ValueError(f"Missing metabolite after coefficient in term: {raw!r}")
+        terms.append((coeff, token, fallback))
+    return terms
+
+
+def _new_met_id(model: cobra.Model, prefix: str) -> str:
+    """Next free ``<prefix><int>`` metabolite ID (RAVEN m1, m2, ... scheme)."""
+    pattern = re.compile(rf"^{re.escape(prefix)}(\d+)$")
+    used = [int(m.group(1)) for met in model.metabolites if (m := pattern.match(met.id))]
+    n = max(used) + 1 if used else 1
+    while f"{prefix}{n}" in model.metabolites:
+        n += 1
+    return f"{prefix}{n}"
+
+
+def _try_existing(
+    model: cobra.Model, token: str, *, mets_by: str, compartment: str | None
+) -> Metabolite | None:
+    """Look up ``token`` as an existing metabolite (no creation, no side effects).
+
+    Returns the matching metabolite or ``None``. Used by ``_stoichiometry`` to
+    disambiguate the ``"<number> <rest>"`` shape: if a metabolite whose *name*
+    (or id) literally contains a leading number exists, prefer it over splitting
+    the number off as a coefficient.
+    """
+    name, comp = parse_name_comp(token)
+    if mets_by == "id" and comp is None:
+        return model.metabolites.get_by_id(token) if token in model.metabolites else None
+    target_comp = comp if comp is not None else compartment
+    if target_comp is None:
+        return None
+    for met in model.metabolites:
+        if met.name == name and met.compartment == target_comp:
+            return met
+    return None
+
+
+def _resolve_metabolite(
+    model: cobra.Model,
+    token: str,
+    *,
+    mets_by: str,
+    compartment: str | None,
+    allow_new_mets: bool,
+    new_met_prefix: str,
+) -> Metabolite:
+    """Resolve an equation token to an existing or newly created Metabolite."""
+    name, comp = parse_name_comp(token)
+
+    if mets_by == "id" and comp is None:
+        # token is a metabolite ID
+        if token in model.metabolites:
+            return model.metabolites.get_by_id(token)
+        if not allow_new_mets:
+            raise ValueError(
+                f"Unknown metabolite ID {token!r}; pass allow_new_mets=True to create it."
+            )
+        if compartment is None:
+            raise ValueError(
+                f"Cannot create metabolite {token!r}: no compartment given."
+            )
+        _warn_unknown_compartment(model, compartment, token)
+        met = Metabolite(token, compartment=compartment)
+        model.add_metabolites([met])
+        return met
+
+    # name-based (mets_by="name") or explicit name[comp]
+    target_comp = comp if comp is not None else compartment
+    if target_comp is None:
+        raise ValueError(
+            f"Metabolite {token!r} matched by name needs a compartment; "
+            "pass compartment=... or use the name[comp] syntax."
+        )
+    if comp is not None and target_comp not in model.compartments and not allow_new_mets:
+        raise ValueError(f"Compartment {target_comp!r} is not in the model.")
+
+    matches = [
+        met
+        for met in model.metabolites
+        if met.name == name and met.compartment == target_comp
+    ]
+    if matches:
+        return matches[0]
+    if not allow_new_mets:
+        raise ValueError(
+            f"No metabolite named {name!r} in compartment {target_comp!r}; "
+            "pass allow_new_mets=True to create it."
+        )
+    _warn_unknown_compartment(model, target_comp, name)
+    met = Metabolite(_new_met_id(model, new_met_prefix), name=name, compartment=target_comp)
+    model.add_metabolites([met])
+    return met
+
+
+def _warn_unknown_compartment(model: cobra.Model, compartment: str, identifier: str) -> None:
+    """Warn when a new metabolite would be born into a not-yet-registered compartment.
+
+    Both ``mets_by`` paths previously created the metabolite without validating
+    the compartment, so a typo (``"cyto"`` for ``"c"``) silently produced a
+    one-metabolite ghost compartment. cobra inherits the compartment from the
+    first metabolite assigned to it, so the fix is a warning, not a hard error.
+    """
+    known = set(model.compartments) | set(model._compartments)
+    if compartment not in known:
+        warnings.warn(
+            f"Creating metabolite {identifier!r} in unregistered compartment "
+            f"{compartment!r} (existing: {sorted(known) or 'none'}); "
+            "add the compartment first or check for a typo.",
+            stacklevel=5,
+        )
+
+
+def _stoichiometry(
+    model: cobra.Model,
+    equation: str,
+    *,
+    mets_by: str,
+    compartment: str | None,
+    allow_new_mets: bool,
+    new_met_prefix: str,
+) -> tuple[dict[Metabolite, float], bool]:
+    """Parse an equation into a {Metabolite: net coefficient} dict + reversibility."""
+    lhs, rhs, reversible = _split_equation(equation)
+    coeffs: OrderedDict[Metabolite, float] = OrderedDict()
+    had_terms = False
+    for sign, side in ((-1.0, lhs), (1.0, rhs)):
+        for coeff, token, fallback in _parse_side(side):
+            had_terms = True
+            # "<number> <name>" is ambiguous when the name itself starts with a
+            # number (e.g. "2 oxoglutarate"). Prefer the full-term interpretation
+            # when it matches an existing metabolite — otherwise fall through to
+            # the coefficient-split form.
+            met = None
+            if fallback is not None:
+                met = _try_existing(
+                    model, fallback, mets_by=mets_by, compartment=compartment
+                )
+                if met is not None:
+                    coeff = 1.0
+            if met is None:
+                met = _resolve_metabolite(
+                    model,
+                    token,
+                    mets_by=mets_by,
+                    compartment=compartment,
+                    allow_new_mets=allow_new_mets,
+                    new_met_prefix=new_met_prefix,
+                )
+            coeffs[met] = coeffs.get(met, 0.0) + sign * coeff
+    # Drop metabolites that net to zero (present as both substrate and product).
+    coeffs = OrderedDict((met, c) for met, c in coeffs.items() if c != 0.0)
+    if had_terms and not coeffs:
+        warnings.warn(
+            f"Equation {equation!r} has no net metabolites (all terms cancelled); "
+            "the reaction will be added with empty stoichiometry.",
+            stacklevel=4,
+        )
+    return dict(coeffs), reversible
+
+
+def add_reactions_from_equations(
+    model: cobra.Model,
+    reactions: Sequence[Mapping],
+    *,
+    mets_by: str = "id",
+    compartment: str | None = None,
+    allow_new_mets: bool = True,
+    allow_new_genes: bool = True,
+    new_met_prefix: str = "m",
+) -> list[Reaction]:
+    """Add reactions defined by equation strings, matching mets by ID or name.
+    Parameters
+    ----------
+    model
+        Target ``cobra.Model``, mutated in place.
+    reactions
+        Sequence of mappings, one per reaction. Recognised keys:
+
+        * ``id`` (**required**) — reaction ID; must not already exist.
+        * ``equation`` (**required**) — e.g. ``"atp_c + h2o_c <=> adp_c + pi_c"``.
+          Use ``<=>`` for reversible, ``-->``/``->``/``=>`` for irreversible.
+        * ``name`` — reaction name.
+        * ``bounds`` — ``(lower, upper)`` tuple; overrides the arrow.
+        * ``gene_reaction_rule`` — GPR string.
+        * ``subsystem`` — subsystem name.
+    mets_by
+        How bare equation tokens (without ``[comp]``) are matched:
+        ``"id"`` (RAVEN eqnType 1) or ``"name"`` (eqnType 2). A ``name[comp]``
+        token (eqnType 3) is always matched by name + compartment.
+    compartment
+        Default compartment for new metabolites and for name-matched tokens
+        without an explicit ``[comp]``.
+    allow_new_mets
+        If True (default), create metabolites not found. New metabolites get
+        ``compartment`` (id mode) or an auto ID ``m1``, ``m2``, ... (name mode).
+        If False, an unknown metabolite raises.
+    allow_new_genes
+        If True (default), genes in a GPR are auto-created by cobra. If False,
+        a GPR referencing a gene not already in the model raises.
+    new_met_prefix
+        Prefix for auto-generated metabolite IDs in name mode (default ``"m"``).
+
+    Returns
+    -------
+    list of cobra.Reaction
+        The reactions added, in input order.
+    """
+    if mets_by not in ("id", "name"):
+        raise ValueError(f"mets_by must be 'id' or 'name', got {mets_by!r}")
+
+    known_genes = {gene.id for gene in model.genes}
+    added: list[Reaction] = []
+
+    for spec in reactions:
+        if "id" not in spec:
+            raise ValueError(f"Reaction spec missing required 'id': {spec!r}")
+        rxn_id = spec["id"]
+        if rxn_id in model.reactions:
+            raise ValueError(
+                f"Reaction {rxn_id!r} already exists; use changeRxns or remove it first."
+            )
+        if "equation" not in spec:
+            raise ValueError(f"Reaction {rxn_id!r} spec missing required 'equation'.")
+
+        coeffs, reversible = _stoichiometry(
+            model,
+            spec["equation"],
+            mets_by=mets_by,
+            compartment=compartment,
+            allow_new_mets=allow_new_mets,
+            new_met_prefix=new_met_prefix,
+        )
+
+        rxn = Reaction(rxn_id, name=spec.get("name", ""))
+        if "bounds" in spec:
+            rxn.bounds = tuple(spec["bounds"])
+        else:
+            config = cobra.Configuration()
+            lower = config.lower_bound if reversible else 0.0
+            rxn.bounds = (lower, config.upper_bound)
+        if "subsystem" in spec:
+            rxn.subsystem = spec["subsystem"]
+
+        model.add_reactions([rxn])
+        rxn.add_metabolites(coeffs)
+
+        rule = spec.get("gene_reaction_rule", "")
+        if rule:
+            if not allow_new_genes:
+                missing = sorted(set(GPR.from_string(rule).genes) - known_genes)
+                if missing:
+                    raise ValueError(
+                        f"Reaction {rxn_id!r} references genes not in the model: "
+                        f"{missing}. Set allow_new_genes=True or add them first."
+                    )
+            rxn.gene_reaction_rule = rule
+            known_genes.update(gene.id for gene in rxn.genes)
+
+        added.append(rxn)
+
+    return added
diff --git a/src/raven_python/manipulation/change.py b/src/raven_python/manipulation/change.py
new file mode 100644
index 0000000..78612ba
--- /dev/null
+++ b/src/raven_python/manipulation/change.py
@@ -0,0 +1,125 @@
+"""Change the stoichiometry of existing reactions from equation strings.
+
+Editing the same ``Reaction`` object changes only its stoichiometry — its id, name,
+bounds, GPR, subsystem, and position are preserved automatically by cobra.
+
+So this port simply re-parses the equation (reusing the same metabolite
+matching as :func:`~raven_python.manipulation.add.add_reactions_from_equations`,
+including name and ``name[comp]`` modes that cobra lacks) and swaps the
+metabolites in place.
+
+Like RAVEN, **bounds are left unchanged** even if the new equation's arrow
+implies a different reversibility — use a bounds setter for that.
+"""
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+import cobra
+from cobra import Reaction
+
+from raven_python.manipulation.add import _stoichiometry
+
+__all__ = ["change_reaction_equations", "change_gene_reaction_rules"]
+
+
+def change_reaction_equations(
+    model: cobra.Model,
+    equations: Mapping[str, str],
+    *,
+    mets_by: str = "id",
+    compartment: str | None = None,
+    allow_new_mets: bool = True,
+    new_met_prefix: str = "m",
+) -> list[Reaction]:
+    """Replace the stoichiometry of existing reactions.
+    Parameters
+    ----------
+    model
+        Target ``cobra.Model``, mutated in place.
+    equations
+        Mapping of ``reaction_id -> equation string``. Every ID must already
+        exist in the model. Equation syntax is identical to
+        :func:`~raven_python.manipulation.add.add_reactions_from_equations`.
+    mets_by, compartment, allow_new_mets, new_met_prefix
+        Metabolite-matching options, as in ``add_reactions_from_equations``.
+
+    Returns
+    -------
+    list of cobra.Reaction
+        The reactions changed, in input order.
+
+    Notes
+    -----
+    Bounds are **not** modified, matching RAVEN. Changing an equation from
+    ``-->`` to ``<=>`` does not by itself make the reaction reversible; adjust
+    the bounds separately.
+    """
+    if mets_by not in ("id", "name"):
+        raise ValueError(f"mets_by must be 'id' or 'name', got {mets_by!r}")
+
+    changed: list[Reaction] = []
+    for rxn_id, equation in equations.items():
+        if rxn_id not in model.reactions:
+            raise ValueError(f"Reaction {rxn_id!r} not found in the model.")
+        rxn = model.reactions.get_by_id(rxn_id)
+
+        coeffs, _reversible = _stoichiometry(
+            model,
+            equation,
+            mets_by=mets_by,
+            compartment=compartment,
+            allow_new_mets=allow_new_mets,
+            new_met_prefix=new_met_prefix,
+        )
+
+        rxn.subtract_metabolites(dict(rxn.metabolites), combine=True)
+        rxn.add_metabolites(coeffs)
+        changed.append(rxn)
+
+    return changed
+
+
+def change_gene_reaction_rules(
+    model: cobra.Model,
+    rules: Mapping[str, str],
+    *,
+    replace: bool = True,
+) -> list[Reaction]:
+    """Set or append gene-reaction rules on existing reactions.
+    cobra already does the heavy lifting on assignment to
+    ``reaction.gene_reaction_rule``: it auto-creates any new ``Gene`` objects and
+    normalises the rule. So the value here is batching plus RAVEN's ``replace``
+    option to **append** rather than overwrite.
+
+    Parameters
+    ----------
+    model
+        Target ``cobra.Model``, mutated in place.
+    rules
+        Mapping of ``reaction_id -> GPR string``. Every ID must already exist.
+    replace
+        If True (default), overwrite the existing GPR. If False, append the new
+        rule as an isozyme: ``(old) or (new)`` (just ``new`` if the reaction had
+        no GPR).
+
+    Returns
+    -------
+    list of cobra.Reaction
+        The reactions changed, in input order.
+    """
+    changed: list[Reaction] = []
+    for rxn_id, rule in rules.items():
+        if rxn_id not in model.reactions:
+            raise ValueError(f"Reaction {rxn_id!r} not found in the model.")
+        rxn = model.reactions.get_by_id(rxn_id)
+
+        if replace or not rxn.gene_reaction_rule:
+            new_rule = rule
+        else:
+            new_rule = f"({rxn.gene_reaction_rule}) or ({rule})"
+
+        rxn.gene_reaction_rule = new_rule  # cobra creates genes + normalises
+        changed.append(rxn)
+
+    return changed
diff --git a/src/raven_python/manipulation/compartments.py b/src/raven_python/manipulation/compartments.py
new file mode 100644
index 0000000..091d196
--- /dev/null
+++ b/src/raven_python/manipulation/compartments.py
@@ -0,0 +1,196 @@
+"""Compartment manipulation — merge all compartments into one, or copy reactions to a
+new compartment (ports of RAVEN's ``mergeCompartments`` and ``copyToComps``).
+
+Both functions are useful **independently of** :func:`raven_python.localization.predict_localization`:
+``merge_compartments`` flattens a multi-compartment model for a simplified analysis
+(e.g. checking whether the network can in principle make a metabolite, with no
+compartment topology in the way); ``copy_to_compartment`` is a building block for
+constructing dual-localised pathways. cobra has no equivalents.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+import cobra
+
+# Compartments produced by merge_compartments (RAVEN uses 's' for "system").
+_MERGED_COMPARTMENT = "s"
+
+
+def merge_compartments(
+    model: cobra.Model,
+    *,
+    merged_id: str = _MERGED_COMPARTMENT,
+    merged_name: str = "system",
+    drop_single_metabolite_reactions: bool = True,
+    deduplicate_reactions: bool = True,
+) -> tuple[cobra.Model, list[str], list[str]]:
+    """Merge every metabolite of ``model`` into one ``merged_id`` compartment.
+
+    Returns ``(model_copy, deleted_single_met_reactions, deduplicated_reactions)``. The
+    returned model is a deep copy of the input. Use cases:
+
+    * Check whether the network can produce/consume a metabolite at all (compartment
+      topology is often what makes a model look blocked).
+    * Simplify a model for visualisation or an analysis that doesn't care about
+      compartments.
+    * As a pre-step for localisation when the user does want RAVEN's
+      "start from scratch" workflow (call :func:`merge_compartments` then
+      :func:`raven_python.localization.predict_localization` with the full reaction list).
+
+    Metabolites that already share a base id (e.g. ``glc__D_c`` and ``glc__D_e`` both
+    map to ``glc__D``) collapse into one entity in the merged compartment; their
+    stoichiometric contributions are summed per reaction. Reactions that end up with
+    only one metabolite (e.g. ``A[c] → A[m]`` becomes ``A → A`` = nothing) are deleted
+    by default (RAVEN's ``deleteRxnsWithOneMet``). Reactions that become identical
+    after merging are deduplicated (one survives).
+    """
+    out = model.copy()
+
+    # 1. For each metabolite, derive a base id (strip the trailing _<compartment>).
+    #    Two mets in different compartments sharing the base id collapse to one.
+    new_to_old: dict[str, list[cobra.Metabolite]] = {}
+    for m in list(out.metabolites):
+        base = _base_id(m)
+        new_to_old.setdefault(base, []).append(m)
+
+    # 2. Build the merged metabolites and rewrite reactions.
+    canonical: dict[str, cobra.Metabolite] = {}
+    for base, mets in new_to_old.items():
+        proto = mets[0]
+        new_met = cobra.Metabolite(base, name=proto.name, compartment=merged_id,
+                                    formula=proto.formula, charge=proto.charge)
+        new_met.notes = dict(proto.notes or {})
+        canonical[base] = new_met
+
+    # Rewrite all reactions: replace each metabolite with its canonical, summing
+    # coefficients where multiple original mets collapse to one.
+    rewritten: dict[str, dict[str, float]] = {}
+    for r in list(out.reactions):
+        new_stoich: dict[cobra.Metabolite, float] = {}
+        for m, coeff in list(r.metabolites.items()):
+            canon = canonical[_base_id(m)]
+            new_stoich[canon] = new_stoich.get(canon, 0.0) + coeff
+        # Drop zero net coefficients (substrate + product of the same base met cancel).
+        new_stoich = {m: c for m, c in new_stoich.items() if c != 0.0}
+        rewritten[r.id] = {m.id: c for m, c in new_stoich.items()}
+
+    # Now build a fresh model with the canonical mets + rewritten reactions; the
+    # cobra in-place rewrite would require careful constraint surgery, so a clean
+    # rebuild is simpler and less error-prone.
+    merged = cobra.Model(out.id or "merged")
+    merged.compartments = {merged_id: merged_name}
+    merged.add_metabolites(list(canonical.values()))
+    deleted_single: list[str] = []
+    deduplicated: list[str] = []
+    seen_signatures: dict[tuple, str] = {}
+    keep_reactions: list[cobra.Reaction] = []
+    for r in out.reactions:
+        stoich = rewritten[r.id]
+        if drop_single_metabolite_reactions and len(stoich) <= 1:
+            deleted_single.append(r.id)
+            continue
+        if not stoich:  # everything cancelled
+            deleted_single.append(r.id)
+            continue
+        sig = (frozenset(stoich.items()), bool(r.lower_bound < 0), bool(r.upper_bound > 0))
+        if deduplicate_reactions and sig in seen_signatures:
+            deduplicated.append(r.id)
+            continue
+        seen_signatures[sig] = r.id
+        new_r = cobra.Reaction(r.id, name=r.name, lower_bound=r.lower_bound,
+                                upper_bound=r.upper_bound)
+        new_r.add_metabolites({merged.metabolites.get_by_id(mid): c for mid, c in stoich.items()})
+        new_r.gene_reaction_rule = r.gene_reaction_rule
+        if r.subsystem:
+            new_r.subsystem = r.subsystem
+        new_r.notes = dict(r.notes or {})
+        keep_reactions.append(new_r)
+    merged.add_reactions(keep_reactions)
+    return merged, deleted_single, deduplicated
+
+
+def copy_to_compartment(
+    model: cobra.Model,
+    reactions: Iterable[str],
+    target_compartment: str,
+    *,
+    target_compartment_name: str | None = None,
+    delete_original: bool = False,
+    id_suffix: str | None = None,
+) -> tuple[cobra.Model, list[str], list[str]]:
+    """Copy a set of reactions into ``target_compartment``. RAVEN's ``copyToComps``.
+
+    Returns ``(model_copy, new_reaction_ids, new_metabolite_ids)``. Use cases:
+
+    * Build a dual-localised pathway (e.g. duplicate glycolysis into a peroxisome).
+    * Mirror a curated subsystem into an additional compartment as a draft to refine.
+    * Set up the input for a flux comparison between alternate compartmentalisations.
+
+    Each copied reaction is given the id ``"<orig_id>_<id_suffix>"`` (default
+    ``id_suffix=target_compartment``); each metabolite it touches is mapped to (or
+    created in) ``target_compartment`` with the same suffix convention. ``delete_original=True``
+    moves the reactions instead of copying.
+    """
+    out = model.copy()
+    suffix = id_suffix if id_suffix is not None else target_compartment
+    if target_compartment not in out.compartments:
+        out.compartments = {**out.compartments,
+                             target_compartment: target_compartment_name or target_compartment}
+
+    preexisting_met_ids = {x.id for x in out.metabolites}
+    new_rxn_ids: list[str] = []
+    for rid in list(reactions):
+        if rid not in out.reactions:
+            raise ValueError(f"reaction {rid!r} not in model")
+        src = out.reactions.get_by_id(rid)
+        new_id = f"{rid}_{suffix}"
+        if new_id in out.reactions:
+            continue  # already copied; idempotent
+        new_stoich: dict[cobra.Metabolite, float] = {}
+        for m, coeff in src.metabolites.items():
+            target_met = _met_in_compartment(out, m, target_compartment, suffix=suffix)
+            new_stoich[target_met] = coeff
+        new_r = cobra.Reaction(new_id, name=src.name,
+                                lower_bound=src.lower_bound, upper_bound=src.upper_bound)
+        new_r.add_metabolites(new_stoich)
+        new_r.gene_reaction_rule = src.gene_reaction_rule
+        if src.subsystem:
+            new_r.subsystem = src.subsystem
+        new_r.notes = dict(src.notes or {})
+        out.add_reactions([new_r])
+        new_rxn_ids.append(new_id)
+        if delete_original:
+            out.remove_reactions([src.id], remove_orphans=False)
+
+    new_met_ids = [m.id for m in out.metabolites if m.id not in preexisting_met_ids]
+    return out, new_rxn_ids, new_met_ids
+
+
+# ----------------------------------------------------------------- helpers
+
+def _base_id(m: cobra.Metabolite) -> str:
+    """Strip the trailing ``_<compartment>`` suffix from a metabolite id (if present)."""
+    if m.compartment and m.id.endswith(f"_{m.compartment}"):
+        return m.id[: -(len(m.compartment) + 1)]
+    return m.id
+
+
+def _met_in_compartment(model: cobra.Model, source: cobra.Metabolite,
+                        compartment: str, *, suffix: str | None = None) -> cobra.Metabolite:
+    """Return (creating if needed) the copy of ``source`` in ``compartment``.
+
+    The new metabolite id is ``"<base>_<suffix>"`` (default ``suffix=compartment``).
+    Already-existing copies are reused.
+    """
+    if source.compartment == compartment:
+        return source
+    base = _base_id(source)
+    new_id = f"{base}_{suffix if suffix is not None else compartment}"
+    if new_id in model.metabolites:
+        return model.metabolites.get_by_id(new_id)
+    new_met = cobra.Metabolite(new_id, name=source.name, compartment=compartment,
+                                formula=source.formula, charge=source.charge)
+    new_met.notes = dict(source.notes or {})
+    model.add_metabolites([new_met])
+    return new_met
diff --git a/src/raven_python/manipulation/expand.py b/src/raven_python/manipulation/expand.py
new file mode 100644
index 0000000..246f3b9
--- /dev/null
+++ b/src/raven_python/manipulation/expand.py
@@ -0,0 +1,124 @@
+"""Expand reactions with isozymes into one reaction per isozyme.
+
+Operates on cobra's GPR AST, so the model stays a plain ``cobra.Model`` throughout.
+
+Provenance: this implementation was first written for geckopy
+(``geckopy/ec_model/pipeline/expand.py``, where it backed makeEcModel stage 5)
+and is adopted here as its canonical home; geckopy will import it from raven_python
+once raven_python is published.
+
+MATLAB-COMPAT: GECKO MATLAB and RAVEN ``expandModel.m`` use string manipulation
+on grRules to detect and split isozymes. raven_python uses cobrapy's GPR AST
+instead. Output should be equivalent for any well-formed GPR; cases that differ
+are likely malformed GPR strings that the AST flags as invalid.
+"""
+from __future__ import annotations
+
+import ast
+import copy
+
+import cobra
+from cobra.core.gene import GPR
+
+
+def _gpr_to_dnf(gpr: GPR) -> list[list[str]]:
+    """Convert a GPR to disjunctive normal form (list of AND-clauses).
+
+    An empty GPR yields an empty list. A single clause (no OR anywhere)
+    yields a list of length 1. OR-of-ANDs yields one sublist per
+    disjunct, each containing the gene names ANDed together.
+
+    Handles distributivity: ``g1 and (g2 or g3)`` becomes
+    ``[[g1, g2], [g1, g3]]``.
+    """
+    if gpr is None or gpr.body is None:
+        return []
+    return _node_to_dnf(gpr.body)
+
+
+def _node_to_dnf(node) -> list[list[str]]:
+    """Recursive helper. Returns DNF as list of AND-clauses."""
+    if isinstance(node, ast.Name):
+        return [[node.id]]
+    if isinstance(node, ast.BoolOp):
+        if isinstance(node.op, ast.Or):
+            result: list[list[str]] = []
+            for child in node.values:
+                result.extend(_node_to_dnf(child))
+            return result
+        if isinstance(node.op, ast.And):
+            clauses: list[list[str]] = [[]]
+            for child in node.values:
+                child_dnf = _node_to_dnf(child)
+                new_clauses: list[list[str]] = []
+                for existing in clauses:
+                    for extra in child_dnf:
+                        new_clauses.append(existing + extra)
+                clauses = new_clauses
+            return clauses
+    raise ValueError(f"Unexpected GPR node type: {type(node).__name__}")
+
+
+def expand_model(model: cobra.Model) -> list[str]:
+    """Split reactions with isozymes (OR in GPR) into one reaction per isozyme.
+    For each reaction whose GPR contains at least one OR, the reaction
+    is removed and replaced by one copy per disjunctive clause. The new
+    reactions get ID suffix ``_EXP_1``, ``_EXP_2``, etc. All other
+    fields (stoichiometry, bounds, name, subsystem) are copied verbatim;
+    only the GPR is simplified to the single AND-clause for that
+    isozyme.
+
+    Reactions with no GPR, or with a GPR that has no OR, are left
+    untouched.
+
+    Parameters
+    ----------
+    model
+        A cobra.Model, mutated in place.
+
+    Returns
+    -------
+    list of str
+        Sorted IDs of newly added expanded reactions (those with
+        ``_EXP_N`` suffixes). The original reactions that were split
+        are no longer in the model.
+    """
+    expansions: list[tuple[cobra.Reaction, list[list[str]]]] = []
+
+    for rxn in model.reactions:
+        if not rxn.gene_reaction_rule:
+            continue
+        clauses = _gpr_to_dnf(rxn.gpr)
+        if len(clauses) <= 1:
+            continue
+        expansions.append((rxn, clauses))
+
+    added_ids: list[str] = []
+    for original_rxn, clauses in expansions:
+        new_rxns: list[cobra.Reaction] = []
+        for i, clause in enumerate(clauses, start=1):
+            new_rxn = cobra.Reaction(
+                id=f"{original_rxn.id}_EXP_{i}",
+                name=original_rxn.name,
+            )
+            new_rxn.lower_bound = original_rxn.lower_bound
+            new_rxn.upper_bound = original_rxn.upper_bound
+            new_rxn.add_metabolites(dict(original_rxn.metabolites.items()))
+            new_rxn.subsystem = original_rxn.subsystem
+            new_rxn.gene_reaction_rule = " and ".join(clause)
+            # Propagate per-reaction metadata (notably ec-code / annotations)
+            # so downstream functions see the same annotations on expanded
+            # reactions as on the original. Deep-copy so siblings are independent.
+            new_rxn.annotation = copy.deepcopy(original_rxn.annotation)
+            new_rxn.notes = copy.deepcopy(original_rxn.notes)
+            new_rxns.append(new_rxn)
+
+        obj_coeff = original_rxn.objective_coefficient
+        model.remove_reactions([original_rxn])
+        model.add_reactions(new_rxns)
+        if obj_coeff:  # keep the original in the objective — sum over its isozyme copies
+            for new_rxn in new_rxns:
+                new_rxn.objective_coefficient = obj_coeff
+        added_ids.extend(r.id for r in new_rxns)
+
+    return sorted(added_ids)
diff --git a/src/raven_python/manipulation/irreversible.py b/src/raven_python/manipulation/irreversible.py
new file mode 100644
index 0000000..3f64a68
--- /dev/null
+++ b/src/raven_python/manipulation/irreversible.py
@@ -0,0 +1,72 @@
+"""Convert reversible reactions to an irreversible (forward + reverse) form.
+
+cobrapy's own ``convert_to_irreversible`` was removed, so this is a genuine
+implementation rather than a wrapper.
+
+Provenance: first written for geckopy
+(``geckopy/ec_model/pipeline/preprocess.py``, makeEcModel stage 4, tagged
+"RAVENpy candidate") and adopted here as its canonical home; geckopy will
+import it from raven_python once raven_python is published.
+"""
+from __future__ import annotations
+
+import cobra
+
+
+def convert_to_irreversible(model: cobra.Model) -> list[str]:
+    """Split non-exchange reversible reactions into a forward + reverse pair.
+    For each non-exchange reaction with ``lb < 0``:
+
+    - The original reaction is kept as the forward direction. Its
+      lower bound is clamped to 0.
+    - A new reaction with the same ID plus a ``_REV`` suffix is added,
+      representing the reverse direction. Its stoichiometry is the
+      negation of the original, its bounds are ``(0, -original_lb)``,
+      and it inherits the name (with " (reversible)" appended) and the
+      gene-protein rule of the original.
+
+    Exchange reactions (boundary reactions) are never split, regardless
+    of their bounds, matching MATLAB behavior where exchange reactions
+    are explicitly excluded from ``convertToIrrev``.
+
+    Parameters
+    ----------
+    model
+        A cobra.Model, mutated in place.
+
+    Returns
+    -------
+    list of str
+        Sorted IDs of newly added reverse reactions (the ones ending in
+        ``_REV``). The forward reactions retain their original IDs.
+    """
+    reverse_rxns_to_add: list[cobra.Reaction] = []
+    forward_updates: list[cobra.Reaction] = []
+
+    for rxn in model.reactions:
+        if rxn.boundary:
+            continue
+        if rxn.lower_bound >= 0:
+            continue
+
+        original_lb = rxn.lower_bound
+
+        rev_rxn = cobra.Reaction(
+            id=f"{rxn.id}_REV",
+            name=(f"{rxn.name} (reversible)" if rxn.name else f"{rxn.id}_REV"),
+        )
+        rev_rxn.lower_bound = 0.0
+        rev_rxn.upper_bound = -original_lb
+        rev_rxn.add_metabolites({m: -c for m, c in rxn.metabolites.items()})
+        rev_rxn.gene_reaction_rule = rxn.gene_reaction_rule
+
+        reverse_rxns_to_add.append(rev_rxn)
+        forward_updates.append(rxn)
+
+    for rxn in forward_updates:
+        rxn.lower_bound = 0.0
+
+    if reverse_rxns_to_add:
+        model.add_reactions(reverse_rxns_to_add)
+
+    return sorted(r.id for r in reverse_rxns_to_add)
diff --git a/src/raven_python/manipulation/merge.py b/src/raven_python/manipulation/merge.py
new file mode 100644
index 0000000..bfa1f24
--- /dev/null
+++ b/src/raven_python/manipulation/merge.py
@@ -0,0 +1,146 @@
+"""Merge several models into one.
+
+cobra's ``Model.merge`` is pairwise and matches everything strictly by id; this
+merges **N** models and unifies metabolites by **name[compartment]** (so the same
+compound under different ids in two models becomes one), while adding **all**
+reactions without de-duplication
+(a reaction whose ID already exists is renamed ``id_<sourceid>``). Genes are
+unified by ID. Provenance (which source model each object came from) is recorded
+in ``notes['origin']``.
+
+The bulk of RAVEN's function is struct field-padding and manual S-matrix
+assembly, none of which is needed on ``cobra.Model``.
+"""
+from __future__ import annotations
+
+import copy
+import warnings
+from collections.abc import Iterable
+
+import cobra
+from cobra import Metabolite, Model, Reaction
+
+
+def _unique_id(existing, base: str, suffix: str) -> str:
+    """Return base, or base_suffix (then base_suffix_2, ...) if it collides."""
+    if base not in existing:
+        return base
+    candidate = f"{base}_{suffix}"
+    n = 2
+    while candidate in existing:
+        candidate = f"{base}_{suffix}_{n}"
+        n += 1
+    return candidate
+
+
+def merge_models(
+    models: Iterable[cobra.Model],
+    *,
+    match_by: str = "name",
+    track_origin: bool = True,
+) -> cobra.Model:
+    """Merge models into a single new model.
+    Parameters
+    ----------
+    models
+        The models to merge (two or more). A single model is returned as a copy.
+    match_by
+        How metabolites are unified across models: ``"name"`` (default) treats
+        metabolites with the same *name and compartment* as identical (IDs
+        ignored); ``"id"`` matches by metabolite ID.
+    track_origin
+        If True (default), record the source model's ``id`` in each reaction's,
+        metabolite's, and gene's ``notes['origin']``.
+
+    Returns
+    -------
+    cobra.Model
+        A new merged model (``id="MERGED"``). Reactions are **not** de-duplicated
+        — matching RAVEN, every reaction from every model is kept, with ID
+        collisions renamed ``id_<sourceid>``.
+    """
+    models = list(models)
+    if not models:
+        raise ValueError("merge_models requires at least one model.")
+    if match_by not in ("name", "id"):
+        raise ValueError(f"match_by must be 'name' or 'id', got {match_by!r}")
+    if len(models) == 1:
+        return models[0].copy()
+
+    merged = Model("MERGED")
+    comp_names: dict[str, str] = {}
+    met_lookup: dict = {}  # name/comp or id key -> merged Metabolite
+
+    def met_key(met: Metabolite):
+        return (met.name, met.compartment) if match_by == "name" else met.id
+
+    def ensure_metabolite(src: Metabolite, origin: str) -> Metabolite:
+        key = met_key(src)
+        if key in met_lookup:
+            existing = met_lookup[key]
+            # Two source models can map to the same name[comp] (or id) with
+            # different formula/charge; silently picking the first-seen has
+            # quietly corrupted mass balance in the past. Warn so the caller
+            # sees the conflict.
+            if src.formula and existing.formula and src.formula != existing.formula:
+                warnings.warn(
+                    f"merge_models: metabolite {existing.id!r} (from earlier model) "
+                    f"and {src.id!r} (from {origin!r}) share key {key!r} but "
+                    f"have different formulas ({existing.formula!r} vs {src.formula!r}); "
+                    "keeping the first.",
+                    stacklevel=3,
+                )
+            if (
+                existing.charge is not None
+                and src.charge is not None
+                and existing.charge != src.charge
+            ):
+                warnings.warn(
+                    f"merge_models: metabolite {existing.id!r} (from earlier model) "
+                    f"and {src.id!r} (from {origin!r}) share key {key!r} but "
+                    f"have different charges ({existing.charge} vs {src.charge}); "
+                    "keeping the first.",
+                    stacklevel=3,
+                )
+            return existing
+        new_id = _unique_id(merged.metabolites, src.id, origin)
+        new_met = Metabolite(
+            new_id, name=src.name, compartment=src.compartment,
+            formula=src.formula, charge=src.charge,
+        )
+        new_met.annotation = copy.deepcopy(src.annotation)
+        new_met.notes = copy.deepcopy(src.notes)
+        if track_origin:
+            new_met.notes.setdefault("origin", origin)
+        merged.add_metabolites([new_met])
+        met_lookup[key] = new_met
+        return new_met
+
+    for model in models:
+        origin = model.id or "model"
+        comp_names.update(model.compartments)
+        genes_before = {g.id for g in merged.genes}
+
+        for rxn in model.reactions:
+            new_id = _unique_id(merged.reactions, rxn.id, origin)
+            new_rxn = Reaction(new_id, name=rxn.name)
+            new_rxn.bounds = rxn.bounds
+            new_rxn.subsystem = rxn.subsystem
+            merged.add_reactions([new_rxn])
+            new_rxn.add_metabolites(
+                {ensure_metabolite(m, origin): coef for m, coef in rxn.metabolites.items()}
+            )
+            if rxn.gene_reaction_rule:
+                new_rxn.gene_reaction_rule = rxn.gene_reaction_rule
+            new_rxn.annotation = copy.deepcopy(rxn.annotation)
+            new_rxn.notes = copy.deepcopy(rxn.notes)
+            if track_origin:
+                new_rxn.notes.setdefault("origin", origin)
+
+        if track_origin:
+            for gene in merged.genes:
+                if gene.id not in genes_before:
+                    gene.notes.setdefault("origin", origin)
+
+    merged._compartments.update(comp_names)
+    return merged
diff --git a/src/raven_python/manipulation/parameters.py b/src/raven_python/manipulation/parameters.py
new file mode 100644
index 0000000..f349804
--- /dev/null
+++ b/src/raven_python/manipulation/parameters.py
@@ -0,0 +1,78 @@
+"""Set reaction bounds to a sign-aware ±% variance band around measured values.
+
+Cobra has no idiom for the *variance band* case (e.g. "5 ± 20 %"); the other common
+bound-setting cases are cobra one-liners:
+
+* fixed lb / ub  → ``reaction.lower_bound`` / ``upper_bound`` / ``reaction.bounds``
+* equality       → ``reaction.bounds = (v, v)``
+* objective      → ``model.objective = {reaction: coeff}``
+* unconstrained  → ``reaction.bounds = cobra.Configuration().bounds``
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable, Sequence
+
+import cobra
+from cobra import Reaction
+
+Number = int | float
+
+
+def _resolve(model: cobra.Model, reactions) -> list[Reaction]:
+    if isinstance(reactions, (str, Reaction)):
+        reactions = [reactions]
+    out: list[Reaction] = []
+    for r in reactions:
+        if isinstance(r, Reaction):
+            out.append(r)
+        elif r in model.reactions:
+            out.append(model.reactions.get_by_id(r))
+        else:
+            raise ValueError(f"Reaction {r!r} not found in the model.")
+    return out
+
+
+def _broadcast(value, n: int) -> list[float]:
+    if isinstance(value, (int, float)):
+        return [float(value)] * n
+    vals = [float(v) for v in value]
+    if len(vals) != n:
+        raise ValueError(
+            f"Expected 1 or {n} values to match the reactions, got {len(vals)}."
+        )
+    return vals
+
+
+def set_variance_bounds(
+    model: cobra.Model,
+    reactions: str | Reaction | Iterable,
+    values: Number | Sequence[Number],
+    percent: Number,
+) -> list[Reaction]:
+    """Constrain reactions to a ``±percent/2`` band around measured values.
+
+    For a measured value ``v`` and ``percent`` ``p``, the bounds become
+    ``v * (1 - p/200) .. v * (1 + p/200)`` — i.e. ``percent`` is the *total*
+    width, split half above and half below. For a negative ``v`` the two are
+    swapped so that ``lb <= ub``. E.g. ``percent=5`` gives 97.5 %..102.5 % of ``v``.
+
+    Parameters
+    ----------
+    reactions
+        Reaction IDs or objects.
+    values
+        Measured value per reaction; a scalar is broadcast to all reactions.
+    percent
+        Total band width as a percentage.
+
+    Returns
+    -------
+    list of cobra.Reaction
+        The reactions affected.
+    """
+    rxns = _resolve(model, reactions)
+    half = percent / 200.0
+    for rxn, v in zip(rxns, _broadcast(values, len(rxns)), strict=True):
+        lo, hi = v * (1 - half), v * (1 + half)
+        rxn.bounds = (hi, lo) if v < 0 else (lo, hi)
+    return rxns
diff --git a/src/raven_python/manipulation/remove.py b/src/raven_python/manipulation/remove.py
new file mode 100644
index 0000000..492de36
--- /dev/null
+++ b/src/raven_python/manipulation/remove.py
@@ -0,0 +1,120 @@
+"""Remove metabolites or genes from a model.
+
+For removing *reactions*, use cobra directly:
+``cobra.Model.remove_reactions(reactions, remove_orphans=...)``.
+
+The two functions here delegate the core to cobra and add the cobra-absent behaviour:
+
+* ``remove_metabolites`` — cobra matches metabolites by ID; RAVEN's ``isNames``
+  deletes a metabolite in **every compartment at once** by name. That name
+  resolution is the *sole* reason this wrapper exists (see the note on it).
+* ``remove_genes`` — cobra's ``cobra.manipulation.remove_genes`` already rewrites
+  GPRs through the boolean AST (removing one gene of ``A and B`` empties the
+  rule, of ``A or B`` keeps the other) — exactly RAVEN's intent, without its
+  ``eval``. The gap is RAVEN's default of **constraining** flux-blocked reactions
+  to zero instead of deleting them; exposed as ``blocked_reactions``.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+import cobra
+from cobra import Gene, Metabolite
+from cobra.manipulation import remove_genes as _cobra_remove_genes
+
+
+def _as_list(obj) -> list:
+    if isinstance(obj, (str, Metabolite, Gene)):
+        return [obj]
+    return list(obj)
+
+
+def remove_metabolites(
+    model: cobra.Model,
+    metabolites: str | Metabolite | Iterable,
+    *,
+    by_name: bool = False,
+    destructive: bool = False,
+) -> None:
+    """Remove metabolites, optionally matching by name across all compartments.
+
+    Parameters
+    ----------
+    by_name
+        If True, ``metabolites`` are metabolite *names*; every metabolite with a
+        matching name is removed, regardless of compartment (RAVEN ``isNames``).
+        If False, they are IDs/objects, resolved via cobra.
+    destructive
+        Passed to cobra: if True, also remove every reaction the metabolite
+        participates in.
+
+    Note
+    ----
+    With ``by_name=False`` this is just ``model.remove_metabolites`` — the
+    ``by_name`` cross-compartment deletion is the only thing this adds over cobra.
+    """
+    if by_name:
+        wanted = set(_as_list(metabolites))
+        targets = [m for m in model.metabolites if m.name in wanted]
+    else:
+        targets = model.metabolites.get_by_any(_as_list(metabolites))
+    if targets:
+        model.remove_metabolites(targets, destructive=destructive)
+
+
+def remove_genes(
+    model: cobra.Model,
+    genes: str | Gene | Iterable,
+    *,
+    blocked_reactions: str = "remove",
+    remove_orphans: bool = False,
+) -> list[str]:
+    """Remove genes and handle reactions left unable to carry flux.
+
+    GPR rewriting (with correct AND/OR semantics) and gene deletion are done by cobra;
+    this adds a policy for reactions whose GPR becomes empty (no enzyme left):
+
+    * ``"remove"`` — delete them (cobra's default).
+    * ``"constrain"`` — keep them but set bounds to ``(0, 0)``.
+    * ``"keep"`` — leave them with an empty GPR and unchanged bounds.
+
+    ``remove_orphans`` (only meaningful with ``blocked_reactions="remove"``)
+    passes through to cobra: drop metabolites *and* genes orphaned by the removal.
+
+    Returns
+    -------
+    list of str
+        IDs of the reactions that became flux-blocked (had a GPR, now empty).
+    """
+    if blocked_reactions not in ("remove", "constrain", "keep"):
+        raise ValueError(
+            f"blocked_reactions must be 'remove', 'constrain', or 'keep', "
+            f"got {blocked_reactions!r}"
+        )
+
+    # Resolve to gene IDs that are actually in the model (RAVEN filters likewise).
+    requested = [g.id if isinstance(g, Gene) else g for g in _as_list(genes)]
+    present = [gid for gid in requested if gid in model.genes]
+    if not present:
+        return []
+
+    # Reactions touched by these genes that currently have a GPR.
+    affected = set()
+    for gid in present:
+        affected.update(r.id for r in model.genes.get_by_id(gid).reactions)
+    had_gpr = {rid for rid in affected if model.reactions.get_by_id(rid).gene_reaction_rule}
+
+    # cobra rewrites GPRs (AST) and removes the gene objects; we manage reactions.
+    _cobra_remove_genes(model, present, remove_reactions=False)
+
+    blocked = [
+        rid for rid in had_gpr if not model.reactions.get_by_id(rid).gene_reaction_rule
+    ]
+
+    if blocked_reactions == "remove":
+        model.remove_reactions(blocked, remove_orphans=remove_orphans)
+    elif blocked_reactions == "constrain":
+        for rid in blocked:
+            model.reactions.get_by_id(rid).bounds = (0, 0)
+
+    return sorted(blocked)
diff --git a/src/raven_python/manipulation/simplify.py b/src/raven_python/manipulation/simplify.py
new file mode 100644
index 0000000..2deaccd
--- /dev/null
+++ b/src/raven_python/manipulation/simplify.py
@@ -0,0 +1,229 @@
+"""Reduce a model by removing/merging reactions that cannot carry flux.
+
+Four reduction modes that cobra does not cover out of the box:
+``remove_dead_end_reactions`` (reactions whose substrates have no producer),
+``remove_duplicate_reactions``, ``constrain_reversible_reactions`` (tighten bounds
+via FVA), and ``group_linear_reactions`` (lossy fold of unit-stoichiometry chains
+into one reaction; drops gene rules).
+
+Cobra-covered modes that you'd reach for separately:
+
+* No-flux removal → ``cobra.flux_analysis.find_blocked_reactions``.
+* Zero-interval removal → filter reactions with ``bounds == (0, 0)`` then prune.
+"""
+from __future__ import annotations
+
+import math
+from collections.abc import Iterable
+
+import cobra
+from cobra.flux_analysis import flux_variability_analysis
+
+from raven_python.manipulation.irreversible import convert_to_irreversible
+
+
+def _prune_orphan_metabolites(model: cobra.Model) -> list[str]:
+    orphans = [m for m in model.metabolites if not m.reactions]
+    if orphans:
+        model.remove_metabolites(orphans)
+    return [m.id for m in orphans]
+
+
+def _can_produce_and_consume(met) -> tuple[bool, bool]:
+    """Whether the network can both produce and consume ``met`` (given directions)."""
+    produce = consume = False
+    for rxn in met.reactions:
+        coef = rxn.get_coefficient(met)
+        if coef > 0:
+            produce |= rxn.upper_bound > 0
+            consume |= rxn.lower_bound < 0
+        elif coef < 0:
+            consume |= rxn.upper_bound > 0
+            produce |= rxn.lower_bound < 0
+    return produce, consume
+
+
+def remove_dead_end_reactions(
+    model: cobra.Model, *, reserved: Iterable[str] | None = None
+) -> tuple[list[str], list[str]]:
+    """Iteratively remove dead-end reactions and metabolites.
+
+    A metabolite
+    is a dead end if it participates in only one reaction, or if (accounting for
+    reaction directionality) it can only be produced or only consumed — such
+    metabolites cannot carry steady-state flux, so the reactions touching them
+    are removed. Repeats until stable.
+
+    Returns ``(removed_reaction_ids, removed_metabolite_ids)``.
+    """
+    reserved = set(reserved or [])
+    removed_rxns: list[str] = []
+    removed_mets: list[str] = []
+    while True:
+        removed_mets += _prune_orphan_metabolites(model)
+        dead = [
+            m
+            for m in model.metabolites
+            if len(m.reactions) <= 1 or not all(_can_produce_and_consume(m))
+        ]
+        if not dead:
+            break
+        rxns = {r for m in dead for r in m.reactions}
+        to_delete = [r for r in rxns if r.id not in reserved]
+        if not to_delete:
+            break
+        removed_rxns += [r.id for r in to_delete]
+        model.remove_reactions(to_delete)
+    return removed_rxns, removed_mets
+
+
+def _signature(rxn):
+    mets = frozenset((m.id, c) for m, c in rxn.metabolites.items())
+    return (mets, rxn.lower_bound, rxn.upper_bound, rxn.objective_coefficient)
+
+
+def remove_duplicate_reactions(
+    model: cobra.Model, *, reserved: Iterable[str] | None = None
+) -> list[str]:
+    """Remove all-but-one of each set of duplicate reactions.
+
+    Reactions are duplicates when they have identical stoichiometry, bounds, and
+    objective coefficient. One of each set is kept (reserved reactions are never
+    removed). Returns the removed reaction IDs.
+    """
+    reserved = set(reserved or [])
+    groups: dict = {}
+    for rxn in model.reactions:
+        groups.setdefault(_signature(rxn), []).append(rxn)
+
+    removed: list[str] = []
+    for rxns in groups.values():
+        if len(rxns) <= 1:
+            continue
+        keep = rxns[-1]
+        to_remove = [r for r in rxns if r is not keep and r.id not in reserved]
+        if to_remove:
+            removed += [r.id for r in to_remove]
+            model.remove_reactions(to_remove)
+    return removed
+
+
+def constrain_reversible_reactions(
+    model: cobra.Model, *, eps: float = 1e-9
+) -> list[str]:
+    """Constrain reversible reactions that can only carry flux one way.
+
+    Runs FVA on
+    each reversible reaction; if it can only carry forward flux its lower bound
+    is set to 0, and if it can only carry reverse flux it is flipped to a forward
+    reaction (stoichiometry, bounds, and objective negated). Returns the changed
+    reaction IDs.
+    """
+    revs = [r for r in model.reactions if r.lower_bound < 0 < r.upper_bound]
+    if not revs:
+        return []
+    # Infeasible models surface as either OptimizationError (Gurobi/HiGHS) or
+    # NaN-filled ranges (some optlang backends silently). Catch both and raise
+    # a single clear error — the original ``abs(NaN) < eps`` comparison would
+    # have silently no-op'd, letting bogus "all reactions truly reversible"
+    # decisions sneak through.
+    try:
+        fva = flux_variability_analysis(
+            model, reaction_list=revs, fraction_of_optimum=0.0
+        )
+    except Exception as exc:  # noqa: BLE001 - solver-family agnostic
+        raise RuntimeError(
+            "constrain_reversible_reactions: FVA failed — the model is likely "
+            "infeasible at fraction_of_optimum=0. Fix the infeasibility first "
+            "(often a missing exchange or an over-constrained essential). "
+            f"({exc})"
+        ) from exc
+    if fva[["minimum", "maximum"]].isna().any().any():
+        raise RuntimeError(
+            "constrain_reversible_reactions: FVA returned NaN ranges — the "
+            "model is infeasible at fraction_of_optimum=0. Fix the infeasibility "
+            "first (often a missing exchange or an over-constrained essential)."
+        )
+
+    changed: list[str] = []
+    for rxn in revs:
+        lo = fva.at[rxn.id, "minimum"]
+        hi = fva.at[rxn.id, "maximum"]
+        # Guard against ±inf ranges (unbounded objective): treat them as truly
+        # reversible rather than "zero" by the abs(·) < eps check.
+        if math.isinf(lo) or math.isinf(hi):
+            continue
+        min_zero, max_zero = abs(lo) < eps, abs(hi) < eps
+        if min_zero == max_zero:  # both ~0 (blocked) or both nonzero (truly reversible)
+            continue
+        if max_zero:  # only reverse flux → flip to a forward reaction
+            old_lb = rxn.lower_bound
+            rxn.add_metabolites({m: -2 * c for m, c in rxn.metabolites.items()})
+            rxn.bounds = (0.0, -old_lb)
+            rxn.objective_coefficient = -rxn.objective_coefficient
+        else:  # only forward flux
+            rxn.lower_bound = 0.0
+        changed.append(rxn.id)
+    return changed
+
+
+def group_linear_reactions(
+    model: cobra.Model, *, reserved: Iterable[str] | None = None
+) -> None:
+    """Merge linear (single-producer, single-consumer) reaction chains.
+
+    **Lossy**: gene-reaction
+    associations are discarded (RAVEN does the same), since merged reactions have
+    no meaningful combined GPR. The model is first made irreversible, then any
+    metabolite that is produced by exactly one reaction and consumed by exactly
+    one reaction is eliminated by merging the two reactions. Mutates in place.
+    """
+    reserved = set(reserved or [])
+
+    # Lossy: drop all gene information.
+    for rxn in model.reactions:
+        rxn.gene_reaction_rule = ""
+    for gene in list(model.genes):
+        model.genes.remove(gene)
+
+    convert_to_irreversible(model)
+
+    # Worklist of metabolites to (re)consider for merging. Each metabolite
+    # participating in a merge can expose new linear chains in its neighbours,
+    # so we re-enqueue the touched mets rather than restart the whole scan
+    # (the old O(n²·m) restart-after-every-merge loop).
+    pending: list = list(model.metabolites)
+    seen_in_pass: set = set()
+    while pending:
+        met = pending.pop()
+        if met not in model.metabolites:  # removed in a previous merge
+            continue
+        rxns = list(met.reactions)
+        if len(rxns) != 2 or any(r.id in reserved for r in rxns):
+            continue
+        r1, r2 = rxns
+        c1, c2 = r1.get_coefficient(met), r2.get_coefficient(met)
+        if (c1 > 0) == (c2 > 0):  # need one producer and one consumer
+            continue
+        ratio = abs(c1 / c2)
+        new_lb = max(r1.lower_bound, r2.lower_bound / ratio)
+        new_ub = min(r1.upper_bound, r2.upper_bound / ratio)
+        new_obj = r1.objective_coefficient + r2.objective_coefficient * ratio
+        # Re-enqueue every metabolite touched by either side — the merge can
+        # turn neighbours into single-producer/consumer chains in turn.
+        touched = {m for m in r1.metabolites} | {m for m in r2.metabolites}
+        # Merge r2*ratio into r1; the shared metabolite cancels and is dropped.
+        r1.add_metabolites({m: c * ratio for m, c in r2.metabolites.items()})
+        model.remove_reactions([r2])
+        r1.bounds = (new_lb, new_ub)
+        r1.objective_coefficient = new_obj
+        seen_in_pass.clear()
+        for m in touched:
+            if m in model.metabolites and id(m) not in seen_in_pass:
+                seen_in_pass.add(id(m))
+                pending.append(m)
+    # One terminal cleanup pass (cheap; only what remains).
+    empty = [r for r in model.reactions if not r.metabolites]
+    if empty:
+        model.remove_reactions(empty)
+    _prune_orphan_metabolites(model)
diff --git a/src/raven_python/manipulation/transfer.py b/src/raven_python/manipulation/transfer.py
new file mode 100644
index 0000000..b867f02
--- /dev/null
+++ b/src/raven_python/manipulation/transfer.py
@@ -0,0 +1,144 @@
+"""Copy reactions (with their metabolites and genes) from another model.
+
+cobra's ``Model.merge`` / ``add_reactions`` match metabolites strictly by id. This
+transfers a chosen set of reactions from a *source* model into a draft, matching
+metabolites by **name[compartment]** instead — so a compound present in both models
+under different ids is reused rather than duplicated, and only genuinely new
+metabolites are created (copying the source's id, formula,
+charge, and annotation). New genes are auto-created by cobra when the GPR is set.
+This is the post-``getModelFromHomology`` "copy a few more reactions across"
+workflow.
+"""
+from __future__ import annotations
+
+import copy
+from collections.abc import Iterable
+
+import cobra
+from cobra import Metabolite, Reaction
+
+from raven_python.manipulation.add import _new_met_id
+
+
+def _name_comp(met: Metabolite) -> str:
+    return f"{met.name}[{met.compartment}]"
+
+
+def add_reactions_from_model(
+    model: cobra.Model,
+    source_model: cobra.Model,
+    reactions: str | Iterable[str],
+    *,
+    genes: bool | str | Iterable[str] = False,
+    note: str | None = "Added via add_reactions_from_model()",
+    confidence: int | None = None,
+) -> list[Reaction]:
+    """Copy reactions from ``source_model`` into ``model``.
+    Parameters
+    ----------
+    model
+        Draft model to copy into (mutated in place).
+    source_model
+        Model to copy reactions from.
+    reactions
+        Reaction ID(s) in ``source_model``. Reactions already present in
+        ``model`` (by ID) are skipped.
+    genes
+        ``False`` (default): add reactions without GPRs. ``True``: copy each
+        reaction's GPR from the source. A string: use it as the GPR for every
+        added reaction. A list: per-reaction GPRs (matching the reactions that
+        are actually added). New genes are created automatically.
+    note
+        Stored in each added reaction's ``notes['note']`` (set ``None`` to skip).
+    confidence
+        If given, stored in each added reaction's ``notes['confidence_score']``.
+
+    Returns
+    -------
+    list of cobra.Reaction
+        The reactions added, in input order.
+    """
+    rxn_ids = [reactions] if isinstance(reactions, str) else list(reactions)
+    missing = [r for r in rxn_ids if r not in source_model.reactions]
+    if missing:
+        raise ValueError(f"Reactions not found in the source model: {missing}")
+
+    new_ids = [r for r in rxn_ids if r not in model.reactions]
+    if not new_ids:
+        raise ValueError("All reactions are already in the model.")
+    source_rxns = [source_model.reactions.get_by_id(r) for r in new_ids]
+
+    if genes is False:
+        rules = [""] * len(source_rxns)
+    elif genes is True:
+        rules = [r.gene_reaction_rule for r in source_rxns]
+    elif isinstance(genes, str):
+        rules = [genes] * len(source_rxns)
+    else:
+        rules = list(genes)
+        if len(rules) != len(source_rxns):
+            raise ValueError(
+                f"genes list has {len(rules)} rules but {len(source_rxns)} "
+                "reactions are being added."
+            )
+
+    # Match metabolites by name[comp]; create only the genuinely new ones.
+    draft_by_name = {_name_comp(m): m for m in model.metabolites}
+    new_mets: list[Metabolite] = []
+    pending: set[str] = set()
+    # Track ids minted within this batch so two source mets that share an id
+    # but differ in name[comp] don't collide when add_metabolites runs.
+    pending_ids: set[str] = set()
+    for srx in source_rxns:
+        for met in srx.metabolites:
+            key = _name_comp(met)
+            if key in draft_by_name or key in pending:
+                continue
+            pending.add(key)
+            if met.id not in model.metabolites and met.id not in pending_ids:
+                new_id = met.id
+            else:
+                # _new_met_id only knows the model; loop past in-batch hits too.
+                new_id = _new_met_id(model, "m")
+                while new_id in pending_ids:
+                    n = int(new_id[1:]) + 1
+                    new_id = f"m{n}"
+                    while new_id in model.metabolites:
+                        n += 1
+                        new_id = f"m{n}"
+            pending_ids.add(new_id)
+            new_met = Metabolite(
+                new_id,
+                name=met.name,
+                compartment=met.compartment,
+                formula=met.formula,
+                charge=met.charge,
+            )
+            new_met.annotation = copy.deepcopy(met.annotation)
+            new_met.notes = copy.deepcopy(met.notes)
+            new_mets.append(new_met)
+            draft_by_name[key] = new_met
+    if new_mets:
+        model.add_metabolites(new_mets)
+
+    added: list[Reaction] = []
+    for srx, rule in zip(source_rxns, rules, strict=True):
+        rxn = Reaction(srx.id, name=srx.name)
+        rxn.bounds = srx.bounds
+        rxn.subsystem = srx.subsystem
+        model.add_reactions([rxn])
+        rxn.add_metabolites(
+            {draft_by_name[_name_comp(met)]: coef for met, coef in srx.metabolites.items()}
+        )
+        if rule:
+            rxn.gene_reaction_rule = rule
+        rxn.annotation = copy.deepcopy(srx.annotation)
+        notes = copy.deepcopy(srx.notes)
+        if note is not None:
+            notes["note"] = note
+        if confidence is not None:
+            notes["confidence_score"] = confidence
+        rxn.notes = notes
+        added.append(rxn)
+
+    return added
diff --git a/src/raven_python/manipulation/transport.py b/src/raven_python/manipulation/transport.py
new file mode 100644
index 0000000..d0c1bf1
--- /dev/null
+++ b/src/raven_python/manipulation/transport.py
@@ -0,0 +1,157 @@
+"""Add transport reactions between compartments.
+
+cobra has no transport-reaction primitive. For each metabolite this matches the
+species by *name* across compartments (the source in ``from_compartment`` and its
+same-named twin in each target compartment), optionally creating the target
+metabolite, and
+builds a ``-1 from / +1 to`` reaction with a sequential ``tr_0001`` ID.
+"""
+from __future__ import annotations
+
+import re
+import warnings
+from collections.abc import Iterable
+
+import cobra
+from cobra import Metabolite, Reaction
+
+from raven_python.manipulation.add import _new_met_id
+
+
+def _index_by_name(mets: Iterable[Metabolite], compartment: str) -> dict[str, Metabolite]:
+    """Index metabolites by name, warning when a name is duplicated.
+
+    Same-name duplicates in a single compartment are unusual but legal in cobra,
+    and the previous one-pass dict comprehension silently dropped all but one.
+    """
+    out: dict[str, list[Metabolite]] = {}
+    for m in mets:
+        out.setdefault(m.name, []).append(m)
+    chosen: dict[str, Metabolite] = {}
+    for name, group in out.items():
+        if len(group) > 1:
+            warnings.warn(
+                f"Multiple metabolites named {name!r} in compartment {compartment!r} "
+                f"({[m.id for m in group]}); using {group[0].id!r} for transport.",
+                stacklevel=3,
+            )
+        chosen[name] = group[0]
+    return chosen
+
+
+def _transport_id_factory(model: cobra.Model, prefix: str):
+    pattern = re.compile(rf"^{re.escape(prefix)}(\d+)$")
+    used = [int(m.group(1)) for r in model.reactions if (m := pattern.match(r.id))]
+    counter = max(used) + 1 if used else 1
+
+    def next_id() -> str:
+        nonlocal counter
+        while f"{prefix}{counter:04d}" in model.reactions:
+            counter += 1
+        rid = f"{prefix}{counter:04d}"
+        counter += 1
+        return rid
+
+    return next_id
+
+
+def add_transport_reactions(
+    model: cobra.Model,
+    from_compartment: str,
+    to_compartments: str | Iterable[str],
+    metabolite_names: str | Iterable[str] | None = None,
+    *,
+    reversible: bool = True,
+    only_to_existing: bool = True,
+    id_prefix: str = "tr_",
+) -> list[Reaction]:
+    """Add transport reactions from one compartment to one or more others.
+    Parameters
+    ----------
+    from_compartment
+        Source compartment id.
+    to_compartments
+        Target compartment id(s).
+    metabolite_names
+        Names of metabolites to transport. Default: every metabolite in
+        ``from_compartment``.
+    reversible
+        If True (default), bounds span the cobra configuration default
+        (reversible); otherwise lower bound 0.
+    only_to_existing
+        If True (default), only transport a metabolite into a target
+        compartment where a same-named metabolite already exists. If False,
+        create the missing target metabolite (copying name/formula/charge/
+        annotation from the source) before adding the transport.
+    id_prefix
+        Prefix for the sequential reaction IDs (``tr_0001``, ...).
+
+    Returns
+    -------
+    list of cobra.Reaction
+        The transport reactions added, in creation order.
+    """
+    # cobra's `model.compartments` only lists compartments that have metabolites;
+    # include registered-but-empty ones so transport can target an empty compartment.
+    known = set(model.compartments) | set(model._compartments)
+    if from_compartment not in known:
+        raise ValueError(f"Compartment {from_compartment!r} is not in the model.")
+    if isinstance(to_compartments, str):
+        to_compartments = [to_compartments]
+    else:
+        to_compartments = list(to_compartments)
+    for comp in to_compartments:
+        if comp not in known:
+            raise ValueError(f"Compartment {comp!r} is not in the model.")
+
+    source = _index_by_name(
+        (m for m in model.metabolites if m.compartment == from_compartment),
+        from_compartment,
+    )
+    if metabolite_names is None:
+        names = list(source)
+    else:
+        names = [metabolite_names] if isinstance(metabolite_names, str) else list(metabolite_names)
+        missing = [n for n in names if n not in source]
+        if missing:
+            raise ValueError(
+                f"Metabolites not found in compartment {from_compartment!r}: {missing}"
+            )
+
+    cfg = cobra.Configuration()
+    bounds = (cfg.lower_bound, cfg.upper_bound) if reversible else (0.0, cfg.upper_bound)
+    from_name = model.compartments.get(from_compartment) or from_compartment
+    next_id = _transport_id_factory(model, id_prefix)
+
+    added: list[Reaction] = []
+    for to_comp in to_compartments:
+        to_name = model.compartments.get(to_comp) or to_comp
+        targets = _index_by_name(
+            (m for m in model.metabolites if m.compartment == to_comp),
+            to_comp,
+        )
+        for name in names:
+            src = source[name]
+            dst = targets.get(name)
+            if dst is None:
+                if only_to_existing:
+                    continue
+                dst = Metabolite(
+                    _new_met_id(model, "m"),
+                    name=name,
+                    compartment=to_comp,
+                    formula=src.formula,
+                    charge=src.charge,
+                )
+                dst.annotation = dict(src.annotation)
+                model.add_metabolites([dst])
+                targets[name] = dst
+
+            rxn = Reaction(next_id())
+            rxn.name = f"{name} transport, {from_name}-{to_name}"
+            rxn.bounds = bounds
+            model.add_reactions([rxn])
+            rxn.add_metabolites({src: -1, dst: 1})
+            added.append(rxn)
+
+    return added
diff --git a/src/raven_python/omics/__init__.py b/src/raven_python/omics/__init__.py
new file mode 100644
index 0000000..166b020
--- /dev/null
+++ b/src/raven_python/omics/__init__.py
@@ -0,0 +1,23 @@
+"""Omics integration — HPA proteomics + RNA-seq parsing and gene-scoring adapters.
+
+Entry point for tissue-specific (f)tINIT runs. See :mod:`raven_python.omics.hpa`.
+"""
+from raven_python.omics.hpa import (
+    HPA_LEVEL_SCORES,
+    HPAData,
+    HPARnaData,
+    hpa_gene_scores,
+    parse_hpa,
+    parse_hpa_rna,
+    rna_gene_scores,
+)
+
+__all__ = [
+    "HPA_LEVEL_SCORES",
+    "HPAData",
+    "HPARnaData",
+    "hpa_gene_scores",
+    "parse_hpa",
+    "parse_hpa_rna",
+    "rna_gene_scores",
+]
diff --git a/src/raven_python/omics/hpa.py b/src/raven_python/omics/hpa.py
new file mode 100644
index 0000000..59caf93
--- /dev/null
+++ b/src/raven_python/omics/hpa.py
@@ -0,0 +1,190 @@
+"""Human Protein Atlas (HPA) parsers + gene-scoring adapters.
+
+HPA publishes two datasets per release: a **proteomics** table (``normal_tissue.tsv``)
+with per-tissue / per-cell-type *categorical* expression levels (High/Medium/Low/Not
+detected) plus reliability flags, and an **RNA-seq** table (``rna_tissue_consensus.tsv``
+/ ``rna_tissue_gtex.tsv``) with per-tissue *TPM* values. Both are returned as tidy
+:class:`pandas.DataFrame`\\ s; the scoring adapters delegate the GPR walk to
+:func:`raven_python.init.score.score_reactions_from_genes` so there is one source of truth
+for reaction scoring.
+
+Pipeline (typical (f)tINIT entry):
+
+.. code-block:: python
+
+    hpa = parse_hpa("normal_tissue.tsv")
+    gene_scores = hpa_gene_scores(hpa, tissue="liver", celltype="hepatocytes")
+    rxn_scores  = score_reactions_from_genes(model, gene_scores)
+    # → ftinit(prep, rxn_scores, gene_scores=gene_scores, ...)
+
+or for RNA-seq:
+
+.. code-block:: python
+
+    rna = parse_hpa_rna("rna_tissue_consensus.tsv")
+    gene_scores = rna_gene_scores(rna, tissue="liver")   # ref = per-gene cross-tissue mean
+    rxn_scores  = score_reactions_from_genes(model, gene_scores)
+"""
+from __future__ import annotations
+
+from collections.abc import Mapping
+from dataclasses import dataclass
+from pathlib import Path
+
+import pandas as pd
+
+from raven_python.init.score import gene_scores_from_expression
+
+# RAVEN's hpaLevelScores defaults (scoreModel.m). HPA reports either antibody-staining
+# levels (Strong/Moderate/Weak/Negative) or "APE" classes (High/Medium/Low/Not detected /
+# Ascending/Descending/...); the four common categories are mapped here. Unknown levels
+# (e.g. "Mixed", "N/A") fall through to NaN and are dropped during scoring.
+HPA_LEVEL_SCORES: dict[str, float] = {
+    "High": 20.0, "Medium": 15.0, "Low": 10.0, "Not detected": -8.0,
+    "Strong": 20.0, "Moderate": 15.0, "Weak": 10.0, "Negative": -8.0,
+}
+
+_HPA_HEADERS = ("Gene", "Gene name", "Tissue", "Cell type", "Level", "Reliability")
+_HPA_RNA_HEADERS = ("Gene", "Gene name", "Tissue")  # extra TPM columns follow
+
+
+@dataclass
+class HPAData:
+    """Tidy HPA proteomics data: one row per (gene, tissue, cell type).
+
+    :attr:`df` columns: ``gene_id``, ``gene_name``, ``tissue``, ``celltype``, ``level``,
+    ``reliability``. ``level`` is the categorical string from HPA; map it to numbers via
+    :func:`hpa_gene_scores` (or pass a custom ``level_scores``).
+    """
+
+    df: pd.DataFrame
+
+    def tissues(self) -> list[str]:
+        return sorted(self.df["tissue"].unique())
+
+    def celltypes(self, tissue: str) -> list[str]:
+        return sorted(self.df.loc[self.df["tissue"] == tissue, "celltype"].unique())
+
+
+@dataclass
+class HPARnaData:
+    """Tidy HPA RNA-seq data: one row per (gene, tissue) with TPM.
+
+    :attr:`df` columns: ``gene_id``, ``gene_name``, ``tissue``, ``tpm``.
+    """
+
+    df: pd.DataFrame
+
+    def tissues(self) -> list[str]:
+        return sorted(self.df["tissue"].unique())
+
+    def expression(self, tissue: str) -> dict[str, float]:
+        """{gene_id: TPM} for ``tissue``. Use this directly with
+        :func:`raven_python.init.score.gene_scores_from_expression`."""
+        sub = self.df.loc[self.df["tissue"] == tissue, ["gene_id", "tpm"]]
+        return dict(zip(sub["gene_id"], sub["tpm"], strict=True))
+
+
+def parse_hpa(path: str | Path) -> HPAData:
+    """Parse an HPA proteomics dump (``normal_tissue.tsv``; version ≥17 format).
+
+    Expected columns (any reasonable delimiter; HPA ships tab-separated):
+    ``Gene  Gene name  Tissue  Cell type  Level  Reliability``. Returns an
+    :class:`HPAData` with one row per (gene, tissue, cell type).
+    """
+    df = pd.read_csv(path, sep=None, engine="python", dtype=str, na_filter=False)
+    _check_headers(df, _HPA_HEADERS, path)
+    df = df.rename(columns={
+        "Gene": "gene_id", "Gene name": "gene_name", "Tissue": "tissue",
+        "Cell type": "celltype", "Level": "level", "Reliability": "reliability",
+    })[["gene_id", "gene_name", "tissue", "celltype", "level", "reliability"]]
+    return HPAData(df.reset_index(drop=True))
+
+
+def parse_hpa_rna(path: str | Path) -> HPARnaData:
+    """Parse an HPA RNA-seq dump.
+
+    Accepts the canonical ≥v17 tidy layout (``Gene  Gene name  Tissue  TPM``, one row per
+    gene × tissue) or the older wide layout with one TPM column per tissue
+    (``Gene  Gene name  TissueA  TissueB  ...``) — the latter is melted into the same
+    tidy shape.
+    """
+    df = pd.read_csv(path, sep=None, engine="python", dtype=str, na_filter=False)
+    if {"Gene", "Gene name", "Tissue", "TPM"}.issubset(df.columns):
+        df = df.rename(columns={"Gene": "gene_id", "Gene name": "gene_name",
+                                 "Tissue": "tissue", "TPM": "tpm"})
+        df = df[["gene_id", "gene_name", "tissue", "tpm"]]
+    elif {"Gene", "Gene name"}.issubset(df.columns):
+        # Wide layout: tissues are extra columns to melt.
+        df = df.melt(id_vars=["Gene", "Gene name"], var_name="tissue", value_name="tpm")
+        df = df.rename(columns={"Gene": "gene_id", "Gene name": "gene_name"})
+    else:
+        raise ValueError(f"{path}: expected Gene/Gene name/Tissue/TPM columns "
+                         f"(got {list(df.columns)})")
+    df["tpm"] = pd.to_numeric(df["tpm"], errors="coerce")
+    df = df.dropna(subset=["tpm"]).reset_index(drop=True)
+    return HPARnaData(df)
+
+
+def hpa_gene_scores(
+    hpa: HPAData,
+    tissue: str,
+    celltype: str | None = None,
+    *,
+    level_scores: Mapping[str, float] | None = None,
+    multiple_celltype: str = "best",
+) -> dict[str, float]:
+    """Numeric gene scores from HPA levels for one ``tissue`` (optionally one ``celltype``).
+
+    Maps HPA's categorical levels to numbers via ``level_scores`` (default
+    :data:`HPA_LEVEL_SCORES`). Genes absent from the tissue, or whose level is not in the
+    score table, are omitted from the output (downstream
+    :func:`score_reactions_from_genes` will then fall back to ``no_gene_score`` for any
+    reaction whose genes are all absent).
+
+    When several cell types per tissue carry the gene, ``multiple_celltype`` chooses
+    between ``"best"`` (max score, RAVEN default) and ``"average"`` (mean across cell types).
+    """
+    if multiple_celltype not in ("best", "average"):
+        raise ValueError(f"multiple_celltype must be 'best' or 'average'; got {multiple_celltype!r}")
+    scores_table = dict(level_scores) if level_scores is not None else HPA_LEVEL_SCORES
+
+    sub = hpa.df.loc[hpa.df["tissue"] == tissue].copy()
+    if celltype is not None:
+        sub = sub.loc[sub["celltype"] == celltype]
+    sub["score"] = sub["level"].map(scores_table)
+    sub = sub.dropna(subset=["score"])  # unknown HPA levels drop out (omitted, not -inf)
+    if sub.empty:
+        return {}
+    agg = {"best": "max", "average": "mean"}[multiple_celltype]
+    return sub.groupby("gene_id")["score"].agg(agg).to_dict()
+
+
+def rna_gene_scores(
+    rna: HPARnaData,
+    tissue: str,
+    *,
+    reference: Mapping[str, float] | float | None = None,
+    factor: float = 5.0,
+    max_score: float = 10.0,
+    min_score: float = -5.0,
+) -> dict[str, float]:
+    """Numeric gene scores from HPA RNA-seq TPM for one ``tissue``.
+
+    Thin wrapper over :func:`raven_python.init.score.gene_scores_from_expression` (the same
+    ``5·ln(TPM/reference)``-clamped scoring used elsewhere): selects the tissue, derives
+    a reference if none is given (per-gene mean TPM across all tissues — RAVEN's default
+    for ``arrayData.threshold``), and returns ``{gene_id: score}``.
+    """
+    if tissue not in set(rna.df["tissue"]):
+        raise ValueError(f"tissue {tissue!r} not in dataset (tissues: {rna.tissues()})")
+    if reference is None:
+        reference = rna.df.groupby("gene_id")["tpm"].mean().to_dict()
+    return gene_scores_from_expression(rna.expression(tissue), reference,
+                                       factor=factor, max_score=max_score, min_score=min_score)
+
+
+def _check_headers(df: pd.DataFrame, expected: tuple[str, ...], path: str | Path) -> None:
+    missing = [h for h in expected if h not in df.columns]
+    if missing:
+        raise ValueError(f"{path}: missing HPA columns {missing} (got {list(df.columns)})")
diff --git a/src/raven_python/reconstruction/__init__.py b/src/raven_python/reconstruction/__init__.py
new file mode 100644
index 0000000..a270e2c
--- /dev/null
+++ b/src/raven_python/reconstruction/__init__.py
@@ -0,0 +1 @@
+"""De novo reconstruction from KEGG and protein homology (BLAST/DIAMOND)."""
diff --git a/src/raven_python/reconstruction/homology/__init__.py b/src/raven_python/reconstruction/homology/__init__.py
new file mode 100644
index 0000000..6ed9748
--- /dev/null
+++ b/src/raven_python/reconstruction/homology/__init__.py
@@ -0,0 +1,19 @@
+"""Homology-based reconstruction from template models (getModelFromHomology, BLAST/DIAMOND)."""
+from raven_python.reconstruction.homology.blast import (
+    blast_from_table,
+    run_blast,
+    run_diamond,
+)
+from raven_python.reconstruction.homology.hits import HIT_COLUMNS, make_ortholog_hits, validate_hits
+from raven_python.reconstruction.homology.homology import HomologyResult, get_model_from_homology
+
+__all__ = [
+    "HIT_COLUMNS",
+    "HomologyResult",
+    "blast_from_table",
+    "get_model_from_homology",
+    "make_ortholog_hits",
+    "run_blast",
+    "run_diamond",
+    "validate_hits",
+]
diff --git a/src/raven_python/reconstruction/homology/blast.py b/src/raven_python/reconstruction/homology/blast.py
new file mode 100644
index 0000000..246ddab
--- /dev/null
+++ b/src/raven_python/reconstruction/homology/blast.py
@@ -0,0 +1,146 @@
+"""Run BLAST+ / DIAMOND (or load precomputed hits) into a homology hits table.
+
+Each producer returns the bidirectional hits DataFrame (``HIT_COLUMNS``) consumed by
+:func:`~raven_python.reconstruction.homology.get_model_from_homology`. Binaries are
+located via :func:`raven_python.binaries.resolve_binary` (arg → env → PATH → bundled).
+"""
+from __future__ import annotations
+
+import io
+import subprocess
+import tempfile
+from collections.abc import Sequence
+from pathlib import Path
+
+import pandas as pd
+
+from raven_python.binaries import resolve_binary
+from raven_python.reconstruction.homology.hits import HIT_COLUMNS, validate_hits
+
+# Tabular output columns requested from BLAST+/DIAMOND, in order.
+_OUTFMT_FIELDS = ["qseqid", "sseqid", "evalue", "pident", "length", "bitscore", "ppos"]
+_FIELD_TO_HIT = {
+    "qseqid": "from_gene", "sseqid": "to_gene", "evalue": "evalue",
+    "pident": "identity", "length": "align_len", "bitscore": "bitscore", "ppos": "ppos",
+}
+
+
+def _parse_tabular(text: str, from_id: str, to_id: str, sep: str) -> pd.DataFrame:
+    """Parse one BLAST/DIAMOND tabular output into hit rows for one direction."""
+    if not text.strip():
+        return pd.DataFrame(columns=HIT_COLUMNS)
+    df = pd.read_csv(io.StringIO(text), sep=sep, names=_OUTFMT_FIELDS, dtype={0: str, 1: str})
+    df = df.rename(columns=_FIELD_TO_HIT)
+    df["from_id"] = from_id
+    df["to_id"] = to_id
+    return df[HIT_COLUMNS]
+
+
+def _as_list(x):
+    return [x] if isinstance(x, (str, Path)) else list(x)
+
+
+def _run(cmd: list[str]) -> str:
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    if proc.returncode != 0:
+        raise RuntimeError(f"{cmd[0]} failed:\n{proc.stderr.strip()}")
+    return proc.stdout
+
+
+def run_blast(
+    organism_id: str,
+    fasta: str | Path,
+    model_ids: Sequence[str],
+    ref_fastas: Sequence[str | Path],
+    *,
+    evalue: float = 1e-5,
+    threads: int = 1,
+    blastp: str | Path | None = None,
+    makeblastdb: str | Path | None = None,
+) -> pd.DataFrame:
+    """Bidirectional BLAST+ between an organism and template organisms.
+
+    Returns the hits DataFrame (filtered at
+    ``evalue``). Requires BLAST+ (`blastp`, `makeblastdb`).
+    """
+    model_ids = list(model_ids)
+    ref_fastas = _as_list(ref_fastas)
+    if len(model_ids) != len(ref_fastas):
+        raise ValueError("model_ids and ref_fastas must have the same length.")
+    blastp = resolve_binary("blastp", binary=blastp)
+    makeblastdb = resolve_binary("makeblastdb", binary=makeblastdb)
+    outfmt = "10 " + " ".join(_OUTFMT_FIELDS)  # 10 = CSV
+
+    frames = []
+    with tempfile.TemporaryDirectory() as tmp:
+        tmp = Path(tmp)
+
+        def blastp_dir(query, subject_fasta, from_id, to_id):
+            db = tmp / f"db_{from_id}_{to_id}"
+            _run([makeblastdb, "-in", str(subject_fasta), "-dbtype", "prot", "-out", str(db)])
+            out = _run([
+                blastp, "-query", str(query), "-db", str(db), "-evalue", str(evalue),
+                "-outfmt", outfmt, "-num_threads", str(threads),
+            ])
+            return _parse_tabular(out, from_id, to_id, sep=",")
+
+        for model_id, ref in zip(model_ids, ref_fastas, strict=True):
+            # template -> organism, and organism -> template
+            frames.append(blastp_dir(ref, fasta, model_id, organism_id))
+            frames.append(blastp_dir(fasta, ref, organism_id, model_id))
+    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame(columns=HIT_COLUMNS)
+
+
+def run_diamond(
+    organism_id: str,
+    fasta: str | Path,
+    model_ids: Sequence[str],
+    ref_fastas: Sequence[str | Path],
+    *,
+    evalue: float = 1e-5,
+    threads: int = 1,
+    sensitivity: str = "--more-sensitive",
+    diamond: str | Path | None = None,
+) -> pd.DataFrame:
+    """Bidirectional DIAMOND between an organism and template organisms.
+
+    Returns the hits DataFrame. Requires DIAMOND.
+    """
+    model_ids = list(model_ids)
+    ref_fastas = _as_list(ref_fastas)
+    if len(model_ids) != len(ref_fastas):
+        raise ValueError("model_ids and ref_fastas must have the same length.")
+    diamond = resolve_binary("diamond", binary=diamond)
+
+    frames = []
+    with tempfile.TemporaryDirectory() as tmp:
+        tmp = Path(tmp)
+
+        def diamond_dir(query, subject_fasta, from_id, to_id):
+            db = tmp / f"db_{from_id}_{to_id}"
+            _run([diamond, "makedb", "--in", str(subject_fasta), "--db", str(db)])
+            cmd = [diamond, "blastp", "--query", str(query), "--db", str(db),
+                   "--evalue", str(evalue), "--outfmt", "6", *_OUTFMT_FIELDS,
+                   "--threads", str(threads)]
+            if sensitivity:
+                cmd.append(sensitivity)
+            return _parse_tabular(_run(cmd), from_id, to_id, sep="\t")
+
+        for model_id, ref in zip(model_ids, ref_fastas, strict=True):
+            frames.append(diamond_dir(ref, fasta, model_id, organism_id))
+            frames.append(diamond_dir(fasta, ref, organism_id, model_id))
+    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame(columns=HIT_COLUMNS)
+
+
+def blast_from_table(source: str | Path | pd.DataFrame) -> pd.DataFrame:
+    """Load a precomputed homology hits table (CSV path or DataFrame).
+
+    a plain CSV/DataFrame, not Excel.
+    Must contain the ``HIT_COLUMNS`` columns.
+    """
+    # Force gene-id columns to str: an all-numeric gene-id column (e.g. Entrez ids)
+    # would otherwise be read as int64 and never match the string gene ids in a model.
+    df = (source if isinstance(source, pd.DataFrame)
+          else pd.read_csv(source, dtype={"from_gene": str, "to_gene": str}))
+    validate_hits(df)
+    return df[HIT_COLUMNS].copy()
diff --git a/src/raven_python/reconstruction/homology/hits.py b/src/raven_python/reconstruction/homology/hits.py
new file mode 100644
index 0000000..2f706c3
--- /dev/null
+++ b/src/raven_python/reconstruction/homology/hits.py
@@ -0,0 +1,64 @@
+"""Homology hits table — the data structure shared across the homology track.
+
+The hits are one tidy ``pandas.DataFrame`` of bidirectional hits, one row per hit.
+This is the currency between the BLAST / DIAMOND wrappers and
+:func:`get_model_from_homology`.
+
+Columns (``HIT_COLUMNS``):
+``from_id, to_id`` (organism/model ids), ``from_gene, to_gene`` (the matched
+genes; ``from_gene`` is in ``from_id``), and the hit metrics
+``evalue, identity, align_len, bitscore, ppos``.
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+import pandas as pd
+
+HIT_COLUMNS = [
+    "from_id", "to_id", "from_gene", "to_gene",
+    "evalue", "identity", "align_len", "bitscore", "ppos",
+]
+
+
+def make_ortholog_hits(
+    ortholog_pairs: Iterable[tuple[str, str]],
+    source_model_id: str,
+    target_id: str,
+) -> pd.DataFrame:
+    """Build a bidirectional hits table from a predefined ortholog list.
+
+    Each ``(source_gene, target_gene)``
+    pair is emitted in both directions with sentinel metrics (evalue 0,
+    identity 100, align_len 1000, bitscore 1000, ppos 100) so every pair passes
+    any reasonable filter. Lets a known ortholog mapping feed
+    :func:`get_model_from_homology` with no BLAST run — also the testing entry
+    point.
+
+    Parameters
+    ----------
+    ortholog_pairs
+        Iterable of ``(source_gene, target_gene)`` — source = template/model
+        organism, target = the organism being built.
+    source_model_id
+        ID of the template model the source genes belong to.
+    target_id
+        ID of the organism to build a model for (``model_for``).
+    """
+    pairs = [(str(s), str(t)) for s, t in ortholog_pairs]
+    if not pairs:
+        raise ValueError("ortholog_pairs is empty.")
+
+    rows = []
+    for source_gene, target_gene in pairs:
+        rows.append((source_model_id, target_id, source_gene, target_gene, 0.0, 100.0, 1000, 1000.0, 100.0))
+        rows.append((target_id, source_model_id, target_gene, source_gene, 0.0, 100.0, 1000, 1000.0, 100.0))
+    return pd.DataFrame(rows, columns=HIT_COLUMNS)
+
+
+def validate_hits(hits: pd.DataFrame) -> pd.DataFrame:
+    """Check a hits DataFrame has the required columns; return it unchanged."""
+    missing = [c for c in HIT_COLUMNS if c not in hits.columns]
+    if missing:
+        raise ValueError(f"hits is missing required columns: {missing}")
+    return hits
diff --git a/src/raven_python/reconstruction/homology/homology.py b/src/raven_python/reconstruction/homology/homology.py
new file mode 100644
index 0000000..bc6fa41
--- /dev/null
+++ b/src/raven_python/reconstruction/homology/homology.py
@@ -0,0 +1,281 @@
+"""Build a draft model from template models + homology hits.
+
+Key behaviour:
+
+* clear ``bidirectional`` / ``best_hits_only`` parameters control the hit-filtering
+  strictness (cleaner than a single overloaded "strictness" knob);
+* GPR rewriting works on cobra's AST, not regex;
+* explicit ``complex_policy`` decides what happens to AND-subunits that lack an
+  ortholog (drop, keep, drop-the-reaction);
+* best-hit selection is bitscore-based;
+* the ortholog map is a DataFrame; provenance is structured.
+"""
+from __future__ import annotations
+
+import ast
+import warnings
+from dataclasses import dataclass, field
+
+import cobra
+import pandas as pd
+
+from raven_python.manipulation.merge import merge_models
+from raven_python.reconstruction.homology.hits import validate_hits
+
+
+@dataclass
+class HomologyResult:
+    """Result of :func:`get_model_from_homology`.
+
+    Attributes
+    ----------
+    model
+        The draft ``cobra.Model``.
+    gene_map
+        ``{model_id: {template_gene: [new_gene, ...]}}`` ortholog mapping used.
+    """
+
+    model: cobra.Model
+    gene_map: dict = field(default_factory=dict)
+
+
+class _Unmapped:
+    """A GPR leaf gene with no ortholog in the new organism."""
+
+    __slots__ = ("gene",)
+
+    def __init__(self, gene: str):
+        self.gene = gene
+
+
+def _rewrite_node(node, ortho: dict, policy: str, model_id: str):
+    """Rewrite a GPR AST node, substituting template genes by their orthologs.
+
+    Returns a GPR sub-expression string, ``None`` (nothing survives), or an
+    ``_Unmapped`` for a bare unmapped leaf (the parent decides what to do).
+    """
+    if isinstance(node, ast.Name):
+        new_genes = ortho.get(node.id)
+        if new_genes:
+            return new_genes[0] if len(new_genes) == 1 else "(" + " or ".join(new_genes) + ")"
+        return _Unmapped(node.id)
+
+    if isinstance(node, ast.BoolOp):
+        children = [_rewrite_node(c, ortho, policy, model_id) for c in node.values]
+        if isinstance(node.op, ast.Or):
+            # An isozyme branch with no ortholog is simply absent.
+            parts = [c for c in children if isinstance(c, str)]
+            if not parts:
+                return None
+            return parts[0] if len(parts) == 1 else "(" + " or ".join(parts) + ")"
+        # And: apply the complex policy to unmapped subunits.
+        parts = []
+        for child in children:
+            if isinstance(child, str):
+                parts.append(child)
+            elif isinstance(child, _Unmapped):
+                if policy == "flag":
+                    parts.append(f"OLD_{model_id}_{child.gene}")
+                elif policy == "drop":
+                    return None  # incomplete complex -> reaction unsupported
+                # policy == "keep": drop the unmapped subunit
+            else:  # None (a dead sub-branch)
+                if policy == "drop":
+                    return None
+        if not parts:
+            return None
+        return parts[0] if len(parts) == 1 else "(" + " and ".join(parts) + ")"
+
+    return None
+
+
+def _rewrite_gpr(rxn, ortho: dict, policy: str, model_id: str):
+    """Return the rewritten GPR string, or None if the reaction is unsupported."""
+    if not rxn.gene_reaction_rule:
+        return None
+    # A reaction is only transferred if at least one of its genes has an ortholog.
+    if not any(g.id in ortho for g in rxn.genes):
+        return None
+    result = _rewrite_node(rxn.gpr.body, ortho, policy, model_id)
+    if isinstance(result, str):
+        return result
+    return None
+
+
+def _strictness_to_params(strictness, bidirectional, best_hits_only, complex_policy, map_direction):
+    """Map RAVEN's strictness 1/2/3 onto the clearer parameters (compat)."""
+    if strictness is None:
+        return bidirectional, best_hits_only, complex_policy, map_direction
+    if strictness == 1:
+        return True, False, complex_policy, map_direction
+    if strictness == 2:
+        return False, False, complex_policy, map_direction
+    if strictness == 3:
+        return True, True, complex_policy, map_direction
+    raise ValueError(f"strictness must be 1, 2 or 3, got {strictness}")
+
+
+def _ortholog_map(
+    hits, model_for, model_ids, *, bidirectional, best_hits_only, score, map_direction,
+    model_genes, max_evalue, min_align_len, min_identity,
+):
+    """Build {model_id: {template_gene: [new_gene, ...]}} from the hits table."""
+    h = hits[
+        (hits.evalue <= max_evalue)
+        & (hits.align_len >= min_align_len)
+        & (hits.identity >= min_identity)
+    ]
+
+    if best_hits_only:
+        ascending = score == "evalue"
+        h = h.sort_values(score, ascending=ascending)
+        h = h.groupby(["from_id", "to_id", "from_gene"], sort=False).head(1)
+
+    # Directional views, normalised to (model_id, new_gene, template_gene).
+    fwd = (
+        h[h.from_id == model_for][["to_id", "from_gene", "to_gene"]]
+        .rename(columns={"to_id": "model_id", "from_gene": "new_gene", "to_gene": "template_gene"})
+    )
+    rev = (
+        h[h.to_id == model_for][["from_id", "from_gene", "to_gene"]]
+        .rename(columns={"from_id": "model_id", "from_gene": "template_gene", "to_gene": "new_gene"})
+    )
+    fwd = fwd[fwd.model_id.isin(model_ids)]
+    rev = rev[rev.model_id.isin(model_ids)]
+
+    if bidirectional:
+        pairs = fwd.merge(rev, on=["model_id", "new_gene", "template_gene"], how="inner")
+    elif map_direction == "new_to_old":
+        pairs = fwd
+    else:
+        pairs = rev
+    pairs = pairs[["model_id", "new_gene", "template_gene"]].drop_duplicates()
+    if pairs.empty:
+        return {}
+
+    # Keep only template genes that actually exist in their model.
+    pairs = pairs[pairs.apply(lambda r: r.template_gene in model_genes.get(r.model_id, ()), axis=1)]
+
+    ortho: dict = {}
+    for model_id, template_gene, new_gene in zip(pairs.model_id, pairs.template_gene, pairs.new_gene, strict=True):
+        ortho.setdefault(model_id, {}).setdefault(template_gene, [])
+        if new_gene not in ortho[model_id][template_gene]:
+            ortho[model_id][template_gene].append(new_gene)
+    for per_model in ortho.values():
+        for genes in per_model.values():
+            genes.sort()
+    return ortho
+
+
+def _apply_preferred_order(ortho: dict, order: list[str]) -> dict:
+    """Each new gene's reactions come from the first model (in order) that maps it."""
+    winner: dict = {}  # new_gene -> winning model_id
+    for model_id in order:
+        for new_genes in ortho.get(model_id, {}).values():
+            for ng in new_genes:
+                winner.setdefault(ng, model_id)
+    pruned: dict = {mid: {} for mid in ortho}
+    for model_id, per_model in ortho.items():
+        for template_gene, new_genes in per_model.items():
+            kept = [ng for ng in new_genes if winner.get(ng) == model_id]
+            if kept:
+                pruned[model_id][template_gene] = kept
+    return pruned
+
+
+def get_model_from_homology(
+    models,
+    hits: pd.DataFrame,
+    model_for: str,
+    *,
+    preferred_order=None,
+    bidirectional: bool = True,
+    best_hits_only: bool = False,
+    map_direction: str = "new_to_old",
+    score: str = "bitscore",
+    complex_policy: str = "flag",
+    only_genes_in_models: bool = False,
+    max_evalue: float = 1e-30,
+    min_align_len: int = 200,
+    min_identity: float = 40,
+    strictness: int | None = None,
+) -> HomologyResult:
+    """Build a draft model for ``model_for`` by transferring reactions from templates.
+
+    ``strictness`` (1/2/3) is a legacy alias for ``bidirectional`` / ``best_hits_only``.
+    """
+    if isinstance(models, cobra.Model):
+        models = [models]
+    if complex_policy not in ("flag", "keep", "drop"):
+        raise ValueError(f"complex_policy must be flag/keep/drop, got {complex_policy!r}")
+    if map_direction not in ("new_to_old", "old_to_new"):
+        raise ValueError(f"map_direction must be new_to_old/old_to_new, got {map_direction!r}")
+    bidirectional, best_hits_only, complex_policy, map_direction = _strictness_to_params(
+        strictness, bidirectional, best_hits_only, complex_policy, map_direction
+    )
+    validate_hits(hits)
+
+    model_by_id = {m.id: m for m in models}
+    model_ids = list(model_by_id)
+    model_genes = {mid: {g.id for g in m.genes} for mid, m in model_by_id.items()}
+    all_model_genes = set().union(*model_genes.values()) if model_genes else set()
+
+    # Sanity: each template should overlap the hits by >=5% of its genes.
+    for mid, genes in model_genes.items():
+        in_hits = genes & (set(hits.from_gene) | set(hits.to_gene))
+        if genes and len(in_hits) < 0.05 * len(genes):
+            warnings.warn(
+                f"<5% of genes in template '{mid}' appear in the hits table; "
+                "check that the FASTA and model use the same gene identifiers.",
+                stacklevel=2,
+            )
+
+    if only_genes_in_models:
+        hits = hits[hits.from_gene.isin(all_model_genes) | hits.to_gene.isin(all_model_genes)]
+
+    ortho = _ortholog_map(
+        hits, model_for, model_ids, bidirectional=bidirectional, best_hits_only=best_hits_only,
+        score=score, map_direction=map_direction,
+        model_genes=model_genes, max_evalue=max_evalue, min_align_len=min_align_len,
+        min_identity=min_identity,
+    )
+
+    order = [str(x) for x in preferred_order] if preferred_order else model_ids
+    if preferred_order and len(models) > 1:
+        ortho = _apply_preferred_order(ortho, order)
+
+    # Build a per-template model holding only the transferred reactions with rewritten GPRs.
+    transferred = []
+    for mid in order:
+        model = model_by_id.get(mid)
+        if model is None:
+            continue
+        per_model = ortho.get(mid, {})
+        m = model.copy()
+        keep: dict[str, str] = {}
+        for rxn in m.reactions:
+            new_gpr = _rewrite_gpr(rxn, per_model, complex_policy, mid)
+            if new_gpr is not None:
+                keep[rxn.id] = new_gpr
+        m.remove_reactions([r for r in m.reactions if r.id not in keep], remove_orphans=True)
+        for rid, gpr in keep.items():
+            r = m.reactions.get_by_id(rid)
+            r.gene_reaction_rule = gpr
+            r.notes = {"note": "Included by get_model_from_homology", "confidence_score": 2,
+                       "homology_source": mid}
+        if m.reactions:
+            transferred.append(m)
+
+    if transferred:
+        draft = merge_models(transferred, match_by="name")
+    else:
+        draft = cobra.Model()
+    draft.id = model_for
+    draft.name = "Generated by get_model_from_homology using " + ", ".join(model_ids)
+
+    # Drop OLD_ placeholder genes that ended up orphaned (none survive in OR branches by construction).
+    orphan_genes = [g for g in draft.genes if not g.reactions]
+    for g in orphan_genes:
+        draft.genes.remove(g)
+
+    return HomologyResult(model=draft, gene_map=ortho)
diff --git a/src/raven_python/reconstruction/kegg/__init__.py b/src/raven_python/reconstruction/kegg/__init__.py
new file mode 100644
index 0000000..5d27602
--- /dev/null
+++ b/src/raven_python/reconstruction/kegg/__init__.py
@@ -0,0 +1,77 @@
+"""KEGG-based draft reconstruction (getKEGGModelForOrganism and friends).
+
+Maintainer build steps: 3b.1 download (:mod:`.download`), 3b.2 dump parsing
+(:mod:`.parse`), 3b.3 HMM libraries (:mod:`.hmm`, :mod:`.taxonomy`). Runtime:
+3b.4 model for a KEGG species (:mod:`.organism`).
+"""
+from raven_python.reconstruction.kegg.download import (
+    download_kegg_dump,
+    extract_kegg_dump,
+    fetch_kegg_files,
+)
+from raven_python.reconstruction.kegg.hmm import (
+    build_hmm_library,
+    build_ko_fastas,
+    build_ko_hmm,
+)
+from raven_python.reconstruction.kegg.organism import (
+    get_kegg_model_for_organism,
+    get_kegg_model_for_organism_from_artefacts,
+)
+from raven_python.reconstruction.kegg.parse import (
+    KeggCompound,
+    KeggKO,
+    KeggReaction,
+    build_kegg_tables,
+    build_reference_model,
+    parse_kegg_compounds,
+    parse_kegg_dump,
+    parse_kegg_kos,
+    parse_kegg_reactions,
+    read_kegg_table,
+    stream_organism_gene_ko,
+    write_kegg_tables,
+)
+from raven_python.reconstruction.kegg.query import (
+    assign_kos,
+    get_kegg_model_from_sequences,
+    get_kegg_model_from_sequences_with_artefacts,
+    parse_hmmscan_tblout,
+    run_hmmscan,
+)
+from raven_python.reconstruction.kegg.taxonomy import (
+    organism_domains,
+    organisms_in_domain,
+    parse_taxonomy,
+)
+
+__all__ = [
+    "KeggCompound",
+    "KeggKO",
+    "KeggReaction",
+    "assign_kos",
+    "build_hmm_library",
+    "build_kegg_tables",
+    "build_ko_fastas",
+    "build_ko_hmm",
+    "build_reference_model",
+    "download_kegg_dump",
+    "extract_kegg_dump",
+    "fetch_kegg_files",
+    "get_kegg_model_for_organism",
+    "get_kegg_model_for_organism_from_artefacts",
+    "get_kegg_model_from_sequences",
+    "get_kegg_model_from_sequences_with_artefacts",
+    "organism_domains",
+    "organisms_in_domain",
+    "parse_hmmscan_tblout",
+    "parse_kegg_compounds",
+    "parse_kegg_dump",
+    "parse_kegg_kos",
+    "parse_kegg_reactions",
+    "parse_taxonomy",
+    "read_kegg_table",
+    "run_hmmscan",
+    "stream_organism_gene_ko",
+    "write_kegg_tables",
+]
diff --git a/src/raven_python/reconstruction/kegg/assemble.py b/src/raven_python/reconstruction/kegg/assemble.py
new file mode 100644
index 0000000..a2b5eb9
--- /dev/null
+++ b/src/raven_python/reconstruction/kegg/assemble.py
@@ -0,0 +1,82 @@
+"""Shared assembly of a draft model from a KO→genes mapping.
+
+Both KEGG runtime paths end the same way: having decided which genes belong to
+which KO — from organism annotations (3b.4) or from HMM hits (3b.5) — they map
+KO→reaction against the gene-free reference model, OR-join the genes into each
+reaction's GPR, keep gene-backed reactions (plus spontaneous ones when allowed),
+and apply the ``keep*`` quality filters. That common tail lives here.
+"""
+from __future__ import annotations
+
+import cobra
+import pandas as pd
+
+_DOMAINS = {"eukaryotes", "prokaryotes"}
+
+
+def flag_set(rxn_flags: pd.DataFrame | None, column: str) -> set[str]:
+    """Reaction ids whose ``column`` flag is truthy (handles bool or TSV strings)."""
+    if rxn_flags is None or column not in rxn_flags:
+        return set()
+    mask = rxn_flags[column].map(lambda v: str(v).strip().lower() in ("true", "1"))
+    return set(rxn_flags.loc[mask, "reaction"])
+
+
+def assemble_model_from_ko_genes(
+    reference_model: cobra.Model,
+    ko_reaction: pd.DataFrame,
+    ko_to_genes: dict[str, list[str]],
+    *,
+    rxn_flags: pd.DataFrame | None = None,
+    keep_spontaneous: bool = True,
+    keep_undefined_stoich: bool = True,
+    keep_incomplete: bool = True,
+    keep_general: bool = False,
+    model_id: str | None = None,
+    model_name: str | None = None,
+    note: str | None = None,
+) -> tuple[cobra.Model, dict[str, list[str]]]:
+    """Build a draft model from a ``{ko: [gene, ...]}`` assignment.
+
+    Returns ``(model, gpr_map)`` where ``gpr_map`` is the kept reactions' gene
+    lists, so callers can add gene annotations afterwards.
+    """
+    rxn_to_kos: dict[str, set[str]] = {}
+    for ko, rid in zip(ko_reaction["ko"], ko_reaction["reaction"], strict=True):
+        rxn_to_kos.setdefault(rid, set()).add(ko)
+
+    spontaneous = flag_set(rxn_flags, "spontaneous")
+    drop_if = {
+        "undefined_stoich": (keep_undefined_stoich, flag_set(rxn_flags, "undefined_stoich")),
+        "incomplete": (keep_incomplete, flag_set(rxn_flags, "incomplete")),
+        "general": (keep_general, flag_set(rxn_flags, "general")),
+    }
+
+    gpr_map: dict[str, list[str]] = {}
+    spontaneous_kept: set[str] = set()
+    for rxn in reference_model.reactions:
+        rid = rxn.id
+        # Quality filters first: dropped even if it would have genes.
+        if any(not keep_flag and rid in flagged for keep_flag, flagged in drop_if.values()):
+            continue
+        genes = sorted({g for ko in rxn_to_kos.get(rid, ()) for g in ko_to_genes.get(ko, ())})
+        if genes:
+            gpr_map[rid] = genes
+        elif rid in spontaneous and keep_spontaneous:
+            spontaneous_kept.add(rid)
+
+    keep = set(gpr_map) | spontaneous_kept
+    model = reference_model.copy()
+    if model_id is not None:
+        model.id = model_id
+    if model_name is not None:
+        model.name = model_name
+    model.remove_reactions(
+        [r for r in model.reactions if r.id not in keep], remove_orphans=True
+    )
+    for rid, genes in gpr_map.items():
+        model.reactions.get_by_id(rid).gene_reaction_rule = " or ".join(genes)
+    if note is not None:
+        for rid in keep:
+            model.reactions.get_by_id(rid).notes["note"] = note
+    return model, gpr_map
diff --git a/src/raven_python/reconstruction/kegg/download.py b/src/raven_python/reconstruction/kegg/download.py
new file mode 100644
index 0000000..8bb1826
--- /dev/null
+++ b/src/raven_python/reconstruction/kegg/download.py
@@ -0,0 +1,257 @@
+"""Download and arrange a local KEGG flat-file dump (step 3b.1).
+
+Maintainer-side, build-time tooling. Ports ``fetch_keggdb.sh`` — fetch the KEGG
+FTP source archives, extract them, and lift/concatenate the files that the
+parser (3b.2) and HMM build (3b.3) consume — but as **pure Python stdlib**
+(``urllib`` + ``tarfile`` + ``gzip`` + ``netrc``). That drops the script's
+dependence on ``wget``/``tar``/``gunzip`` (and Cygwin on Windows), so it runs
+unchanged on Linux, macOS and Windows. Credential hygiene is kept: a paid KEGG
+subscription's username/password are read from ``~/.netrc`` (mode 600), never
+passed on the command line.
+
+Requires an active KEGG FTP subscription. Add to ``~/.netrc``::
+
+    machine ftp.kegg.net login YOUR_USER password YOUR_PASS
+
+Typical use (run once per KEGG release)::
+
+    from raven_python.reconstruction.kegg import download_kegg_dump, parse_kegg_dump
+    download_kegg_dump("keggdb")            # -> keggdb/{reaction,compound,ko,...}
+    parse_kegg_dump("keggdb", "artefacts")  # -> reference model + gzipped TSVs
+
+The arranged dump contains: ``reaction``, ``reaction.lst``,
+``reaction_mapformula.lst``, ``compound`` (compound + glycan concatenated),
+``compound.inchi``, ``ko``, ``genes.pep`` (eukaryote + prokaryote proteomes
+concatenated), and ``taxonomy``.
+"""
+from __future__ import annotations
+
+import gzip
+import netrc
+import shutil
+import tarfile
+import urllib.request
+from pathlib import Path
+
+KEGG_HOST = "ftp.kegg.net"
+BASE_URL = "https://ftp.kegg.net"
+
+# KEGG FTP paths fetched, mirroring fetch_keggdb.sh.
+DEFAULT_FILES: tuple[str, ...] = (
+    "kegg/ligand/reaction.tar.gz",
+    "kegg/ligand/compound.tar.gz",
+    "kegg/ligand/glycan.tar.gz",
+    "kegg/genes/ko.tar.gz",
+    "kegg/genes/fasta/eukaryotes.pep.gz",
+    "kegg/genes/fasta/prokaryotes.pep.gz",
+    "kegg/genes/misc/taxonomy",
+)
+
+
+# --------------------------------------------------------------------------- #
+# Credentials
+# --------------------------------------------------------------------------- #
+def _resolve_auth(
+    host: str,
+    *,
+    netrc_path: str | Path | None = None,
+    auth: tuple[str, str] | None = None,
+) -> tuple[str, str]:
+    """Return ``(user, password)`` for ``host`` from ``auth`` or a ``.netrc`` file."""
+    if auth is not None:
+        return auth
+    path = Path(netrc_path) if netrc_path else Path.home() / ".netrc"
+    if not path.is_file():
+        raise FileNotFoundError(
+            f"No credentials given and {path} does not exist. Create it (chmod 600) "
+            f"with a line:\n    machine {host} login YOUR_USER password YOUR_PASS"
+        )
+    creds = netrc.netrc(str(path)).authenticators(host)
+    if not creds:
+        raise ValueError(
+            f"No credentials for '{host}' in {path}. Add a line:\n"
+            f"    machine {host} login YOUR_USER password YOUR_PASS"
+        )
+    login, _, password = creds
+    if not login or not password:
+        raise ValueError(f"Incomplete credentials for '{host}' in {path}.")
+    return login, password
+
+
+def _build_opener(base_url: str, user: str, password: str) -> urllib.request.OpenerDirector:
+    mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
+    mgr.add_password(None, base_url, user, password)
+    return urllib.request.build_opener(
+        urllib.request.HTTPBasicAuthHandler(mgr),
+        urllib.request.HTTPDigestAuthHandler(mgr),
+    )
+
+
+# --------------------------------------------------------------------------- #
+# Fetch
+# --------------------------------------------------------------------------- #
+def fetch_kegg_files(
+    dest: str | Path,
+    *,
+    files: tuple[str, ...] = DEFAULT_FILES,
+    base_url: str = BASE_URL,
+    host: str = KEGG_HOST,
+    auth: tuple[str, str] | None = None,
+    netrc_path: str | Path | None = None,
+    force: bool = False,
+    verbose: bool = True,
+) -> list[Path]:
+    """Download the raw KEGG archives into ``dest`` (basenames). Returns the paths.
+
+    Existing files are skipped unless ``force=True`` (the script's ``wget -N``
+    intent, simplified to skip-if-present).
+    """
+    user, password = _resolve_auth(host, netrc_path=netrc_path, auth=auth)
+    opener = _build_opener(base_url, user, password)
+    dest = Path(dest)
+    dest.mkdir(parents=True, exist_ok=True)
+
+    out: list[Path] = []
+    for path in files:
+        target = dest / Path(path).name
+        if target.exists() and not force:
+            if verbose:
+                print(f"  skip (exists): {target.name}")
+            out.append(target)
+            continue
+        url = f"{base_url.rstrip('/')}/{path.lstrip('/')}"
+        if verbose:
+            print(f"  fetching {path}")
+        with opener.open(url) as resp, open(target, "wb") as handle:
+            shutil.copyfileobj(resp, handle)
+        out.append(target)
+    return out
+
+
+# --------------------------------------------------------------------------- #
+# Extract / arrange
+# --------------------------------------------------------------------------- #
+def _gunzip(src: Path, target: Path) -> None:
+    with gzip.open(src, "rb") as fh, open(target, "wb") as out:
+        shutil.copyfileobj(fh, out)
+
+
+def _concat(sources: list[Path], target: Path) -> None:
+    with open(target, "wb") as out:
+        for src in sources:
+            with open(src, "rb") as fh:
+                shutil.copyfileobj(fh, out)
+
+
+def extract_kegg_dump(dest: str | Path) -> dict[str, Path]:
+    """Extract and arrange the downloaded archives into the flat dump layout.
+
+    Mirrors ``fetch_keggdb.sh``'s extract step: untar the ``*.tar.gz`` archives,
+    gunzip the ``*.pep.gz`` proteomes, lift the needed files out of their
+    sub-directories, and concatenate compound+glycan and the two proteomes.
+    Tar extraction uses the ``data`` filter (no path traversal). Returns a
+    mapping of logical name -> path for the files produced.
+
+    Network-free, so this is the unit-tested core; ``download_kegg_dump`` chains
+    :func:`fetch_kegg_files` in front of it.
+    """
+    dest = Path(dest)
+
+    for tar_path in sorted(dest.glob("*.tar.gz")):
+        with tarfile.open(tar_path) as tar:
+            tar.extractall(dest, filter="data")
+        tar_path.unlink()
+
+    for gz_path in sorted(dest.glob("*.gz")):  # only the .pep.gz remain
+        _gunzip(gz_path, gz_path.with_suffix(""))
+        gz_path.unlink()
+
+    def lift(rel: str, tmp: str) -> Path | None:
+        src = dest / rel
+        if src.is_file():
+            shutil.move(str(src), str(dest / tmp))
+            return dest / tmp
+        return None
+
+    reaction = lift("reaction/reaction", "_reaction")
+    lift("reaction/reaction.lst", "reaction.lst")
+    lift("reaction/reaction_mapformula.lst", "reaction_mapformula.lst")
+    compound = lift("compound/compound", "_compound")
+    lift("compound/compound.inchi", "compound.inchi")
+    glycan = lift("glycan/glycan", "_glycan")
+    ko = lift("ko/ko", "_ko")
+
+    for subdir in ("reaction", "compound", "glycan", "ko"):
+        path = dest / subdir
+        if path.is_dir():
+            shutil.rmtree(path)
+
+    missing = [n for n, p in (("reaction", reaction), ("compound", compound), ("ko", ko)) if p is None]
+    if missing:
+        raise FileNotFoundError(
+            f"KEGG archives did not yield required file(s): {missing}. "
+            f"Check that the source .tar.gz archives are present in {dest}."
+        )
+
+    shutil.move(str(reaction), str(dest / "reaction"))
+    shutil.move(str(ko), str(dest / "ko"))
+    if glycan is not None:
+        _concat([compound, glycan], dest / "compound")
+        compound.unlink()
+        glycan.unlink()
+    else:
+        shutil.move(str(compound), str(dest / "compound"))
+
+    peps = [p for p in (dest / "eukaryotes.pep", dest / "prokaryotes.pep") if p.is_file()]
+    if peps:
+        _concat(peps, dest / "genes.pep")
+        for pep in peps:
+            pep.unlink()
+
+    result: dict[str, Path] = {}
+    for name in (
+        "reaction",
+        "reaction.lst",
+        "reaction_mapformula.lst",
+        "compound",
+        "compound.inchi",
+        "ko",
+        "genes.pep",
+        "taxonomy",
+    ):
+        path = dest / name
+        if path.is_file():
+            result[name] = path
+    return result
+
+
+def download_kegg_dump(
+    dest: str | Path,
+    *,
+    files: tuple[str, ...] = DEFAULT_FILES,
+    base_url: str = BASE_URL,
+    host: str = KEGG_HOST,
+    auth: tuple[str, str] | None = None,
+    netrc_path: str | Path | None = None,
+    force: bool = False,
+    verbose: bool = True,
+) -> dict[str, Path]:
+    """Fetch and arrange a complete KEGG dump into ``dest``.
+
+    Convenience wrapper chaining :func:`fetch_kegg_files` and
+    :func:`extract_kegg_dump`. Returns the logical-name -> path mapping of the
+    arranged dump, ready for :func:`raven_python.reconstruction.kegg.parse_kegg_dump`.
+    """
+    fetch_kegg_files(
+        dest,
+        files=files,
+        base_url=base_url,
+        host=host,
+        auth=auth,
+        netrc_path=netrc_path,
+        force=force,
+        verbose=verbose,
+    )
+    if verbose:
+        print(">>> Extracting and arranging KEGG dump...")
+    return extract_kegg_dump(dest)
diff --git a/src/raven_python/reconstruction/kegg/hmm.py b/src/raven_python/reconstruction/kegg/hmm.py
new file mode 100644
index 0000000..0e210b6
--- /dev/null
+++ b/src/raven_python/reconstruction/kegg/hmm.py
@@ -0,0 +1,453 @@
+"""Build per-KO HMM libraries from KEGG sequences (step 3b.3, maintainer-side).
+
+Ports RAVEN ``constructMultiFasta`` plus the clustering/alignment/training stages
+of ``getKEGGModelForOrganism``. Run once per KEGG release to produce the
+``prok90`` / ``euk90`` HMM libraries that the de-novo query path (3b.5) searches.
+
+Per KO, within one domain (prokaryote / eukaryote):
+
+1. **Multi-FASTA** — gather the member genes' sequences from ``genes.pep``
+   (:func:`build_ko_fastas`).
+2. **CD-HIT** — dereplicate near-identical sequences (default 90 % identity).
+3. **MAFFT** — multiple-sequence alignment (``--auto --anysymbol``).
+4. **hmmbuild** — train the profile HMM.
+
+Finally the per-KO HMMs are concatenated and ``hmmpress``-ed into a single searchable
+library: a single ``hmmscan`` against the pressed database replaces a per-KO sweep with
+``hmmsearch``.
+
+The pure parts (FASTA indexing/grouping, command construction, CD-HIT ``-n``
+choice) are unit-tested; running the binaries needs HMMER/MAFFT/CD-HIT, located
+via :func:`raven_python.binaries.resolve_binary`.
+"""
+from __future__ import annotations
+
+import functools
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+import time
+from pathlib import Path
+
+import pandas as pd
+
+from raven_python.binaries import resolve_binary
+from raven_python.reconstruction.kegg.taxonomy import organisms_in_domain
+
+logger = logging.getLogger(__name__)
+
+
+# --------------------------------------------------------------------------- #
+# Step 1 — per-KO multi-FASTA (constructMultiFasta)
+# --------------------------------------------------------------------------- #
+def _full_id(organism: str, gene: str) -> str:
+    """The genes.pep header key for a gene, i.e. ``organism:gene``."""
+    return f"{organism}:{gene}"
+
+
+def _index_fasta(path: str | Path, wanted: set[str]) -> dict[str, tuple[int, int]]:
+    """Map each wanted record id to its ``(start, end)`` byte span in ``path``.
+
+    The record id is the first whitespace-delimited token of the ``>`` header.
+    One streaming pass; only wanted ids are kept (memory stays small).
+    """
+    index: dict[str, tuple[int, int]] = {}
+    cur_id: str | None = None
+    cur_start = 0
+    pos = 0
+    with open(path, "rb") as handle:
+        for line in handle:
+            if line.startswith(b">"):
+                if cur_id is not None and cur_id in wanted:
+                    index[cur_id] = (cur_start, pos)
+                cur_id = line[1:].split(None, 1)[0].decode()
+                cur_start = pos
+            pos += len(line)
+    if cur_id is not None and cur_id in wanted:
+        index[cur_id] = (cur_start, pos)
+    return index
+
+
+def build_ko_fastas(
+    organism_gene_ko: pd.DataFrame,
+    genes_pep: str | Path,
+    out_dir: str | Path,
+    *,
+    organisms: set[str] | None = None,
+) -> dict[str, Path]:
+    """Write one ``<KO>.fa`` per KO with its member genes' sequences.
+
+    but with a stdlib offset index instead
+    of the Java-hashtable byte scan. ``organisms`` restricts to a domain's
+    organism codes (for the prok/euk split). Empty KOs are skipped (no file).
+    Returns ``{ko: path}`` for the files written.
+    """
+    out_dir = Path(out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    rows = organism_gene_ko
+    if organisms is not None:
+        rows = rows[rows["organism"].isin(organisms)]
+
+    ko_to_ids: dict[str, list[str]] = {}
+    wanted: set[str] = set()
+    for organism, gene, ko in zip(rows["organism"], rows["gene"], rows["ko"], strict=True):
+        fid = _full_id(organism, gene)
+        ko_to_ids.setdefault(ko, []).append(fid)
+        wanted.add(fid)
+
+    index = _index_fasta(genes_pep, wanted)
+
+    written: dict[str, Path] = {}
+    with open(genes_pep, "rb") as src:
+        for ko, ids in ko_to_ids.items():
+            present = sorted({i for i in ids if i in index})
+            if not present:
+                continue
+            path = out_dir / f"{ko}.fa"
+            with open(path, "wb") as out:
+                for fid in present:
+                    start, end = index[fid]
+                    src.seek(start)
+                    out.write(src.read(end - start))
+            written[ko] = path
+    return written
+
+
+# --------------------------------------------------------------------------- #
+# Steps 2-4 — cluster, align, train (one KO)
+# --------------------------------------------------------------------------- #
+def _cdhit_word_size(seq_identity: float) -> str:
+    """CD-HIT ``-n`` word size for a given identity threshold (per CD-HIT guide)."""
+    if not 0.4 < seq_identity <= 1.0:
+        raise ValueError("seq_identity must be in (0.4, 1.0] (or -1 to skip CD-HIT).")
+    if seq_identity > 0.7:
+        return "5"
+    if seq_identity > 0.6:
+        return "4"
+    if seq_identity > 0.5:
+        return "3"
+    return "2"
+
+
+def _count_sequences(fasta: Path) -> int:
+    with open(fasta, "rb") as fh:
+        return sum(1 for line in fh if line.startswith(b">"))
+
+
+def _fasta_stats(fasta: Path) -> tuple[int, int]:
+    """Return ``(sequence_count, total_residues)`` in one pass."""
+    n = residues = 0
+    with open(fasta, "rb") as fh:
+        for line in fh:
+            if line.startswith(b">"):
+                n += 1
+            else:
+                residues += len(line.strip())
+    return n, residues
+
+
+def _cdhit_cmd(cdhit: str, inp: Path, out: Path, seq_identity: float, threads: int) -> list[str]:
+    return [
+        cdhit, "-i", str(inp), "-o", str(out),
+        "-c", str(seq_identity), "-n", _cdhit_word_size(seq_identity),
+        "-M", "2000", "-T", str(threads),
+    ]
+
+
+# MAFFT uses fast progressive FFT-NS-2 until an alignment is large enough to
+# threaten memory, then switches to memory-light PartTree (which keeps all
+# sequences; only the guide tree is approximated).
+#
+# Peak FFT-NS-2 RSS is driven by the progressive-alignment DP work, ~ n_seqs ×
+# (mean length)^2  (equivalently residues^2 / n_seqs) — NOT residue count alone:
+# a few hundred long proteins cost far more than the same residues spread over
+# many short ones. Empirical fit (real KEGG sequences, 12 threads):
+#     RSS_GB ≈ _MAFFT_GB_PER_COST × (n_seqs × mean_len^2)
+# Measured (residues, n_seqs, RSS): 250k/266/0.67, 500k/534/1.25, 1.0M/1066/3.16,
+# 1.5M/1624/5.73, and K12047 941k/452 (mean len 2082) which OOM'd >7 GB — its
+# cost 1.96e9 is the largest of all, hence the length-aware metric.
+_MAFFT_GB_PER_COST = 4.2e-9  # GB per unit of (n_seqs × mean_len^2); conservative upper bound
+_MAFFT_MEMORY_OVERHEAD_GB = 2.5  # RAM not for MAFFT (OS + WSL2 + Python); WSL total overcounts
+_MEMORY_SAFETY = 0.65  # leave headroom; never budget MAFFT to the brink
+_DEFAULT_COST_BUDGET = 5e8  # fallback DP-cost budget when total memory can't be detected
+_LOW_MEMORY_BYTES = 16 * 1024**3  # below this, warn that the budget is conservative
+
+
+def _total_memory_bytes() -> int | None:
+    try:
+        return os.sysconf("SC_PHYS_PAGES") * os.sysconf("SC_PAGE_SIZE")
+    except (AttributeError, ValueError, OSError):
+        return None
+
+
+def _alignment_cost(n_seqs: int, residues: int) -> float:
+    """FFT-NS-2 memory proxy: ``n_seqs × mean_len^2`` = ``residues^2 / n_seqs``."""
+    return residues * residues / n_seqs if n_seqs else 0.0
+
+
+@functools.lru_cache(maxsize=1)
+def _auto_cost_budget() -> float:
+    """Max FFT-NS-2 DP-cost (``n_seqs × mean_len^2``) before switching to PartTree.
+
+    Derived from available RAM via the measured memory model; above it, an
+    alignment is predicted to exceed a safe fraction of the RAM left for MAFFT.
+    Computed and logged once; warns on low-memory hosts (more KOs then use the
+    approximate PartTree).
+    """
+    total = _total_memory_bytes()
+    if total is None:
+        logger.warning(
+            "Could not detect system memory; using default MAFFT cost budget %.2e. "
+            "Pass parttree_residues to override.", _DEFAULT_COST_BUDGET,
+        )
+        return _DEFAULT_COST_BUDGET
+    total_gb = total / 1024**3
+    mafft_gb = max(total_gb - _MAFFT_MEMORY_OVERHEAD_GB, 0.5)
+    budget = _MEMORY_SAFETY * mafft_gb / _MAFFT_GB_PER_COST
+    logger.info(
+        "MAFFT DP-cost budget %.2e auto-set from %.1f GB RAM (~%.1f GB for MAFFT)",
+        budget, total_gb, mafft_gb,
+    )
+    if total < _LOW_MEMORY_BYTES:
+        logger.warning(
+            "Limited memory (%.1f GB total): MAFFT cost budget set conservatively to "
+            "%.2e, so more (especially long-protein) KOs use the approximate PartTree "
+            "alignment. With more RAM, fewer would.", total_gb, budget,
+        )
+    return budget
+
+
+def _mafft_cmd(
+    mafft: str, inp: Path, threads: int, *, fast: bool = True, parttree: bool = False
+) -> list[str]:
+    """Build the MAFFT command.
+
+    ``fast`` selects FFT-NS-2 (``--retree 2 --maxiterate 0``) — fast progressive
+    alignment, the right trade-off for building profile HMMs — instead of
+    ``--auto`` (which picks slow iterative refinement on medium/large inputs).
+    ``parttree`` adds MAFFT's PartTree approximation for very large inputs.
+    """
+    cmd = [mafft]
+    if parttree:
+        cmd += ["--retree", "2", "--parttree"]
+    elif fast:
+        cmd += ["--retree", "2", "--maxiterate", "0"]
+    else:
+        cmd += ["--auto"]
+    cmd += ["--anysymbol", "--thread", str(threads), str(inp)]
+    return cmd
+
+
+def _hmmbuild_cmd(
+    hmmbuild: str, out_hmm: Path, aligned: Path, threads: int, name: str | None = None
+) -> list[str]:
+    cmd = [hmmbuild, "--cpu", str(threads)]
+    if name:  # name the profile after its KO so hmmscan targets are KO ids
+        cmd += ["-n", name]
+    cmd += [str(out_hmm), str(aligned)]
+    return cmd
+
+
+def _run(cmd: list[str], *, stdout_path: Path | None = None) -> str:
+    """Run a command; optionally redirect stdout to a file. Raises on failure."""
+    if stdout_path is not None:
+        with open(stdout_path, "w") as out:
+            proc = subprocess.run(cmd, stdout=out, stderr=subprocess.PIPE, text=True)
+        stderr = proc.stderr or ""
+    else:
+        proc = subprocess.run(cmd, capture_output=True, text=True)
+        stderr = proc.stderr or ""
+    if proc.returncode != 0:
+        raise RuntimeError(f"{Path(cmd[0]).name} failed:\n{stderr.strip()}")
+    return stderr
+
+
+def _staged_run(
+    cmd: list[str], *, label: str, stage: str, verbose: bool,
+    stdout_path: Path | None = None, log: bool = True,
+) -> float:
+    """Run a stage's command; log one completion line per stage (when verbose).
+
+    At INFO (when ``log``): a single ``[KO] stage: done in X.Xs`` line — the
+    ``stage`` descriptor already names the tool/mode and any seq/res/cost context,
+    so the timing is just appended rather than repeated on a second line. The
+    tool's own stderr (MAFFT/CD-HIT/hmmbuild progress) is logged at DEBUG. Pass
+    ``log=False`` to suppress the line so the caller can fold the timing into its
+    own message. Returns the stage's wall-clock seconds.
+    """
+    start = time.perf_counter()
+    stderr = _run(cmd, stdout_path=stdout_path)
+    elapsed = time.perf_counter() - start
+    if verbose:
+        if log:
+            logger.info("[%s] %s: done in %.1fs", label, stage, elapsed)
+        if stderr.strip():
+            logger.debug("[%s] %s output:\n%s", label, stage, stderr.strip())
+    return elapsed
+
+
+def build_ko_hmm(
+    ko_fasta: str | Path,
+    out_hmm: str | Path,
+    *,
+    seq_identity: float = 0.9,
+    parttree_residues: int | None = None,
+    threads: int = 1,
+    fast: bool = True,
+    verbose: bool = False,
+    cdhit: str | Path | None = None,
+    mafft: str | Path | None = None,
+    hmmbuild: str | Path | None = None,
+) -> Path:
+    """Cluster, align and train a profile HMM for one KO's multi-FASTA.
+
+    Single-sequence KOs skip CD-HIT/MAFFT (a lone sequence is its own alignment).
+    ``seq_identity=-1`` skips CD-HIT. All (deduplicated) sequences are kept —
+    memory on large KOs is bounded by switching MAFFT to PartTree, not by
+    dropping sequences. ``fast`` uses MAFFT FFT-NS-2 (fast progressive) rather
+    than ``--auto``'s slow iterative refinement. MAFFT switches to memory-light
+    PartTree once an alignment is predicted to be too memory-heavy: by default from
+    its **DP cost** (``n_seqs × mean_len²`` — long proteins cost far more than the
+    same residue count in short ones) against a RAM-derived budget
+    (:func:`_auto_cost_budget`). Passing ``parttree_residues`` overrides this with a
+    simple residue-count cutoff.
+    ``verbose`` logs (via the ``logging`` module, INFO/DEBUG) which tool is running
+    for this KO, sequence counts at each stage, timings, and the tools' own
+    output. Returns ``out_hmm``.
+    """
+    ko_fasta = Path(ko_fasta)
+    out_hmm = Path(out_hmm)
+    label = out_hmm.stem
+    out_hmm.parent.mkdir(parents=True, exist_ok=True)
+    n = _count_sequences(ko_fasta)
+    if n == 0:
+        raise ValueError(f"{ko_fasta} contains no sequences.")
+    if verbose:
+        logger.info("[%s] start: %d sequences", label, n)
+
+    hmmbuild = resolve_binary("hmmbuild", binary=hmmbuild)
+    with tempfile.TemporaryDirectory() as tmp:
+        tmp = Path(tmp)
+        if n == 1:
+            if verbose:
+                logger.info("[%s] single sequence: skipping CD-HIT/MAFFT", label)
+            aligned = ko_fasta  # trivially aligned
+        else:
+            clustered = ko_fasta
+            cdhit_elapsed: float | None = None
+            if seq_identity != -1:
+                clustered = tmp / "clustered.fa"
+                cdhit_elapsed = _staged_run(
+                    _cdhit_cmd(
+                        resolve_binary("cd-hit", binary=cdhit), ko_fasta, clustered,
+                        seq_identity, threads,
+                    ),
+                    label=label, stage=f"CD-HIT ({seq_identity})", verbose=verbose, log=False,
+                )
+            n_clustered, residues = _fasta_stats(clustered)
+            if verbose and cdhit_elapsed is not None:
+                logger.info(
+                    "[%s] CD-HIT (%s): %d -> %d sequences in %.1fs",
+                    label, seq_identity, n, n_clustered, cdhit_elapsed,
+                )
+            aligned = tmp / "aligned.fa"
+            if n_clustered == 1:
+                if verbose:
+                    logger.info("[%s] one sequence after CD-HIT: skipping MAFFT", label)
+                shutil.copyfile(clustered, aligned)  # MAFFT can't align a single seq
+            else:
+                # PartTree once the alignment is too memory-heavy. Default: its DP
+                # cost (n_seqs × mean_len^2) vs a RAM-derived budget — length-aware,
+                # so long-protein KOs (few seqs, huge residues) route correctly.
+                # parttree_residues, if given, overrides with a residue-count cutoff.
+                cost = _alignment_cost(n_clustered, residues)
+                if parttree_residues is None:
+                    parttree = cost > _auto_cost_budget()
+                else:
+                    parttree = residues > parttree_residues
+                _staged_run(
+                    _mafft_cmd(
+                        resolve_binary("mafft", binary=mafft), clustered, threads,
+                        fast=fast, parttree=parttree,
+                    ),
+                    label=label,
+                    stage=f"MAFFT {'PartTree' if parttree else 'FFT-NS-2' if fast else 'auto'} "
+                    f"({n_clustered} seqs, {residues} res, cost {cost:.2e})",
+                    verbose=verbose,
+                    stdout_path=aligned,
+                )
+        _staged_run(
+            _hmmbuild_cmd(hmmbuild, out_hmm, aligned, threads, name=label),
+            label=label, stage="hmmbuild", verbose=verbose,
+        )
+    if verbose:
+        logger.info("[%s] complete -> %s", label, out_hmm)
+    return out_hmm
+
+
+# --------------------------------------------------------------------------- #
+# Orchestration — a full domain library
+# --------------------------------------------------------------------------- #
+def build_hmm_library(
+    organism_gene_ko: pd.DataFrame,
+    genes_pep: str | Path,
+    taxonomy: str | Path,
+    out_dir: str | Path,
+    *,
+    domain: str,
+    seq_identity: float = 0.9,
+    parttree_residues: int | None = None,
+    threads: int = 1,
+    fast: bool = True,
+    verbose: bool = False,
+    press: bool = True,
+    cdhit: str | Path | None = None,
+    mafft: str | Path | None = None,
+    hmmbuild: str | Path | None = None,
+    hmmpress: str | Path | None = None,
+) -> dict[str, Path | list[Path]]:
+    """Build a domain (``"prokaryotes"``/``"eukaryotes"``) HMM library.
+
+    Restricts genes to the domain's organisms (from ``taxonomy``), builds a
+    multi-FASTA and a profile HMM per KO under ``out_dir``, and (if ``press``)
+    concatenates them into ``out_dir/library.hmm`` and ``hmmpress``-es it for fast
+    ``hmmscan`` querying. Returns ``{"hmms": [...], "library": path | None}``.
+
+    Heavy and binary-dependent — intended for the maintainer, run once per KEGG
+    release. Skips KOs that already have an ``.hmm`` (resumable).
+    """
+    out_dir = Path(out_dir)
+    fasta_dir = out_dir / "fasta"
+    hmm_dir = out_dir / "hmms"
+    hmm_dir.mkdir(parents=True, exist_ok=True)
+
+    organisms = organisms_in_domain(taxonomy, domain)
+    if not organisms:
+        raise ValueError(f"No organisms found for domain {domain!r} in {taxonomy}.")
+
+    ko_fastas = build_ko_fastas(organism_gene_ko, genes_pep, fasta_dir, organisms=organisms)
+
+    hmms: list[Path] = []
+    for ko, fasta in ko_fastas.items():
+        out_hmm = hmm_dir / f"{ko}.hmm"
+        if not out_hmm.exists():
+            build_ko_hmm(
+                fasta, out_hmm, seq_identity=seq_identity,
+                parttree_residues=parttree_residues, threads=threads, fast=fast,
+                verbose=verbose, cdhit=cdhit, mafft=mafft, hmmbuild=hmmbuild,
+            )
+        hmms.append(out_hmm)
+
+    library: Path | None = None
+    if press and hmms:
+        library = out_dir / "library.hmm"
+        with open(library, "wb") as out:
+            for hmm in sorted(hmms):
+                with open(hmm, "rb") as fh:
+                    shutil.copyfileobj(fh, out)
+        _run([resolve_binary("hmmpress", binary=hmmpress), "-f", str(library)])
+
+    return {"hmms": hmms, "library": library}
diff --git a/src/raven_python/reconstruction/kegg/organism.py b/src/raven_python/reconstruction/kegg/organism.py
new file mode 100644
index 0000000..9f30575
--- /dev/null
+++ b/src/raven_python/reconstruction/kegg/organism.py
@@ -0,0 +1,153 @@
+"""Build a draft model for a KEGG species from the reference artefacts (step 3b.4).
+
+Ports the **organism-ID** path of RAVEN ``getKEGGModelForOrganism`` (the branch
+taken when no FASTA file is given). For an organism already annotated in KEGG it
+needs no homology search: take the organism's gene↔KO assignments, map KO→reaction
+against the gene-free reference model, OR-join the organism's genes into each
+reaction's GPR, and keep the reactions that end up with genes (plus spontaneous
+reactions, optionally). The HMM/FASTA path is step 3b.5 (:mod:`.query`).
+
+Consumes the 3b.2 artefacts: the gene-free reference ``cobra.Model`` plus the
+``ko_reaction``, ``organism_gene_ko`` and ``rxn_flags`` tables. The KO→reaction
+mapping is taken from the ``ko_reaction`` table (a lossless published artefact)
+rather than from the reference model's annotations, so it does not depend on KEGG
+annotations surviving an SBML round-trip.
+
+Domain mode (``organism_id`` = ``"eukaryotes"``/``"prokaryotes"``) keeps the genes
+of every organism in that domain; it needs the KEGG ``taxonomy`` file. Unlike
+RAVEN, this uses the domain classification directly rather than the full
+``getPhylDist`` distance matrix — the matrix existed for per-organism HMM
+subsampling, which our fixed prok90/euk90 libraries (3b.3) make unnecessary.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import cobra
+import pandas as pd
+
+from raven_python.io.yaml import read_yaml_model
+from raven_python.reconstruction.kegg.assemble import _DOMAINS, assemble_model_from_ko_genes
+from raven_python.reconstruction.kegg.parse import read_kegg_table
+from raven_python.reconstruction.kegg.taxonomy import organisms_in_domain
+
+_NOTE = "Included by get_kegg_model_for_organism (no HMMs)"
+
+
+def get_kegg_model_for_organism(
+    organism_id: str,
+    reference_model: cobra.Model,
+    ko_reaction: pd.DataFrame,
+    organism_gene_ko: pd.DataFrame,
+    *,
+    rxn_flags: pd.DataFrame | None = None,
+    taxonomy: str | Path | None = None,
+    keep_spontaneous: bool = True,
+    keep_undefined_stoich: bool = True,
+    keep_incomplete: bool = True,
+    keep_general: bool = False,
+) -> cobra.Model:
+    """Reconstruct a draft model for a KEGG species from its KO annotations.
+
+    Parameters
+    ----------
+    organism_id
+        Three/four-letter KEGG organism code (e.g. ``"eco"``), or
+        ``"eukaryotes"``/``"prokaryotes"`` for a whole-domain model (requires
+        ``taxonomy``). Matched case-insensitively.
+    reference_model
+        The gene-free KEGG reference model (from :func:`build_reference_model`).
+    ko_reaction, organism_gene_ko, rxn_flags
+        The relational tables from :func:`build_kegg_tables` (or read back with
+        :func:`read_kegg_table`).
+    taxonomy
+        Path to the KEGG ``taxonomy`` file; required only for domain mode.
+    keep_spontaneous, keep_undefined_stoich, keep_incomplete, keep_general
+        Quality filters (RAVEN's ``keep*``). A reaction flagged in ``rxn_flags``
+        is dropped unless its keep flag is set; this takes precedence over having
+        genes. Spontaneous reactions are additionally kept *without* genes when
+        ``keep_spontaneous`` is true.
+
+    Returns
+    -------
+    cobra.Model
+        A copy of the reference restricted to the organism's reactions, with GPRs
+        built and ``kegg.genes`` annotations on the genes.
+    """
+    org = organism_id.lower()
+    if org in _DOMAINS:
+        if taxonomy is None:
+            raise ValueError(
+                f"Domain mode ({organism_id!r}) needs the KEGG taxonomy file; "
+                "pass taxonomy=."
+            )
+        members = organisms_in_domain(taxonomy, org)
+        rows = organism_gene_ko[organism_gene_ko["organism"].str.lower().isin(members)]
+    else:
+        known = set(organism_gene_ko["organism"].str.lower())
+        if org not in known:
+            raise ValueError(
+                f"Organism '{organism_id}' has no genes in organism_gene_ko. "
+                f"Provide a KEGG species code present in the table."
+            )
+        rows = organism_gene_ko[organism_gene_ko["organism"].str.lower() == org]
+
+    ko_to_genes: dict[str, list[str]] = {}
+    for org_code, gene, ko in zip(rows["organism"], rows["gene"], rows["ko"], strict=True):
+        # In domain mode genes from different organisms can share a bare id;
+        # qualify with the organism so they stay distinct.
+        gene_id = gene if org not in _DOMAINS else f"{org_code.lower()}:{gene}"
+        ko_to_genes.setdefault(ko, []).append(gene_id)
+
+    model, _ = assemble_model_from_ko_genes(
+        reference_model,
+        ko_reaction,
+        ko_to_genes,
+        rxn_flags=rxn_flags,
+        keep_spontaneous=keep_spontaneous,
+        keep_undefined_stoich=keep_undefined_stoich,
+        keep_incomplete=keep_incomplete,
+        keep_general=keep_general,
+        model_id=organism_id,
+        model_name=f"Generated by get_kegg_model_for_organism for {organism_id}",
+        note=_NOTE,
+    )
+    for gene in model.genes:
+        # Species mode: bare gene id -> organism:gene. Domain mode: already
+        # organism-qualified.
+        value = gene.id if ":" in gene.id else f"{org}:{gene.id}"
+        gene.annotation["kegg.genes"] = value
+    return model
+
+
+def get_kegg_model_for_organism_from_artefacts(
+    organism_id: str,
+    artefact_dir: str | Path | None = None,
+    *,
+    version: str | None = None,
+    **kwargs,
+) -> cobra.Model:
+    """Load the published 3b.2 artefacts from ``artefact_dir`` and build the model.
+
+    Reads ``reference_model.yml.gz`` and the ``ko_reaction``/``organism_gene_ko``/
+    ``rxn_flags`` gzipped-TSV tables, then calls :func:`get_kegg_model_for_organism`.
+    If ``artefact_dir`` is ``None`` the published artefacts are fetched/cached via
+    :func:`raven_python.data.ensure_kegg_data` (``version`` selects the release).
+    """
+    if artefact_dir is None:
+        from raven_python.data import ensure_kegg_data
+
+        artefact_dir = ensure_kegg_data(version=version)
+    artefact_dir = Path(artefact_dir)
+    reference_model = read_yaml_model(artefact_dir / "reference_model.yml.gz")
+    ko_reaction = read_kegg_table(artefact_dir / "ko_reaction.tsv.gz")
+    organism_gene_ko = read_kegg_table(artefact_dir / "organism_gene_ko.tsv.xz")
+    rxn_flags = read_kegg_table(artefact_dir / "rxn_flags.tsv.gz")
+    return get_kegg_model_for_organism(
+        organism_id,
+        reference_model,
+        ko_reaction,
+        organism_gene_ko,
+        rxn_flags=rxn_flags,
+        **kwargs,
+    )
diff --git a/src/raven_python/reconstruction/kegg/parse.py b/src/raven_python/reconstruction/kegg/parse.py
new file mode 100644
index 0000000..3ecd6f4
--- /dev/null
+++ b/src/raven_python/reconstruction/kegg/parse.py
@@ -0,0 +1,578 @@
+"""Parse a local KEGG flat-file dump into a reference model + relational tables.
+
+Maintainer-side, build-time tooling. Produces the published raven_python KEGG artefacts:
+
+* a **gene-free reference GEM** (reactions + metabolites only) as a ``cobra.Model``;
+* minimal **relational tables** (``pandas.DataFrame``) written as gzipped TSV —
+  ``ko_reaction``, ``ko_names``, ``organism_gene_ko`` (the large one), and
+  ``rxn_flags`` (spontaneous / undefined-stoich / incomplete / general).
+
+Genes live only in ``organism_gene_ko``; per-organism GPRs are built at runtime
+(3b.4/3b.5), so the reference model stays small.
+
+Improvements over the RAVEN port (logged in IMPROVEMENTS.md):
+
+* **K1** — equations are read from each reaction entry's own ``EQUATION`` field,
+  dropping RAVEN's fragile dependence on ``reaction.lst`` being in the exact same
+  line order as ``reaction``.
+* **K2** — undefined-stoichiometry terms (``n C00001``, ``(n+1) C00002``) keep
+  their real compound id with coefficient 1 and the reaction is *flagged*, rather
+  than minting ``"n C00001"`` pseudo-metabolites and renaming them ``undefined_N``.
+* **K3** — quality labels become a tidy boolean ``rxn_flags`` table instead of
+  free-text appended to ``rxnNotes``.
+
+The KEGG flat-file format: each entry is a block of lines terminated by ``///``;
+a field label occupies columns 1-12, continuation lines are indented 12 spaces.
+"""
+from __future__ import annotations
+
+import gzip
+import heapq
+import lzma
+import re
+import tempfile
+from collections.abc import Iterator
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import cobra
+import pandas as pd
+
+from raven_python.io.yaml import write_yaml_model
+
+# A KEGG entry id is the first token after the 12-char ENTRY label (6 chars:
+# R00010, C00001, K01194, ...).
+_ID_LEN = 6
+_LABEL_WIDTH = 12
+
+# Compound token inside an equation, optionally a glycan (G) or drug (D); we also
+# tolerate trailing polymer suffixes like "C00404(n)" by matching the stem.
+_MET_TOKEN = re.compile(r"^([CGD]\d{5})")
+_NUMERIC = re.compile(r"^\d+(\.\d+)?$")
+
+
+# --------------------------------------------------------------------------- #
+# Generic flat-file reader
+# --------------------------------------------------------------------------- #
+def _iter_entries(path: str | Path) -> Iterator[dict[str, list[str]]]:
+    """Yield one ``{field_label: [value_lines]}`` dict per ``///``-delimited entry.
+
+    Field labels (columns 1-12) key a list of their value lines in file order;
+    continuation lines (12 leading spaces) append to the current field.
+    """
+    entry: dict[str, list[str]] = {}
+    current: str | None = None
+    with open(path, encoding="utf-8") as handle:
+        for raw in handle:
+            line = raw.rstrip("\n")
+            if line.startswith("///"):
+                if entry:
+                    yield entry
+                entry, current = {}, None
+                continue
+            if not line.strip():
+                continue
+            label = line[:_LABEL_WIDTH].strip()
+            value = line[_LABEL_WIDTH:].rstrip()
+            if label:
+                current = label
+                entry.setdefault(current, []).append(value)
+            elif current is not None:
+                entry[current].append(value)
+    if entry:  # tolerate a missing final '///'
+        yield entry
+
+
+# --------------------------------------------------------------------------- #
+# Reactions
+# --------------------------------------------------------------------------- #
+@dataclass
+class KeggReaction:
+    """A reaction parsed from the KEGG ``reaction`` flat file."""
+
+    id: str
+    name: str = ""
+    equation: str = ""
+    reversible: bool = True
+    eccodes: list[str] = field(default_factory=list)
+    kos: list[str] = field(default_factory=list)
+    pathways: list[str] = field(default_factory=list)
+    spontaneous: bool = False
+    incomplete: bool = False
+    general: bool = False
+    undefined_stoich: bool = False
+    # Cached stoichiometry from ``_parse_equation(equation)``: populated by
+    # :func:`parse_kegg_reactions` so :func:`build_reference_model` reuses the
+    # parse instead of repeating it (KEGG has ~12k reactions; a full redundant
+    # parse cost a noticeable chunk of the build).
+    stoichiometry: dict[str, float] = field(default_factory=dict)
+
+
+def _first_id(lines: list[str]) -> str:
+    return lines[0][:_ID_LEN].strip() if lines else ""
+
+
+def _comment_flags(rxn: KeggReaction, comment: str) -> None:
+    text = comment.upper()
+    rxn.spontaneous = "SPONTANEOUS" in text
+    rxn.incomplete = any(w in text for w in ("INCOMPLETE", "ERRONEOUS", "UNCLEAR"))
+    rxn.general = "GENERAL REACTION" in text
+
+
+def _parse_equation(equation: str) -> tuple[dict[str, float], bool, bool]:
+    """Parse a KEGG equation into ``({met_id: coef}, reversible, undefined_stoich)``.
+
+    Reactants get negative coefficients, products positive. Non-numeric
+    coefficients (``n``, ``(n+1)``, ``2n``) are treated as 1.0 and flag the
+    reaction as having undefined stoichiometry (improvement K2).
+    """
+    reversible = "<=>" in equation
+    parts = re.split(r"\s(?:<=>|=>|<=)\s", equation, maxsplit=1)
+    lhs, rhs = (parts + ["", ""])[:2]
+
+    stoich: dict[str, float] = {}
+    undefined = False
+    for side, sign in ((lhs, -1.0), (rhs, 1.0)):
+        for term in filter(None, (t.strip() for t in side.split(" + "))):
+            tokens = term.split()
+            met_token = tokens[-1]
+            coef_tokens = tokens[:-1]
+            if coef_tokens and _NUMERIC.match(coef_tokens[0]):
+                coef = float(coef_tokens[0])
+            else:
+                coef = 1.0
+                if coef_tokens:  # a symbolic coefficient like 'n' or '(n+1)'
+                    undefined = True
+            match = _MET_TOKEN.match(met_token)
+            if not match:  # unparseable term -> flag, keep raw token
+                undefined = True
+                met_id = met_token
+            else:
+                met_id = match.group(1)
+            stoich[met_id] = stoich.get(met_id, 0.0) + sign * coef
+    # Drop metabolites that cancel out (A <=> A + B leaves A at 0).
+    stoich = {m: c for m, c in stoich.items() if c != 0.0}
+    return stoich, reversible, undefined
+
+
+def parse_kegg_reactions(kegg_dir: str | Path) -> list[KeggReaction]:
+    """Parse ``<kegg_dir>/reaction`` into :class:`KeggReaction` records.
+
+    Reversibility is taken from the equation arrow and, when
+    ``reaction_mapformula.lst`` is present, refined to mark reactions that are
+    irreversible across all KEGG maps (see :func:`_irreversible_from_mapformula`).
+    """
+    kegg_dir = Path(kegg_dir)
+    reactions: list[KeggReaction] = []
+    for entry in _iter_entries(kegg_dir / "reaction"):
+        rxn = KeggReaction(id=_first_id(entry.get("ENTRY", [])))
+        if not rxn.id:
+            continue
+        if entry.get("NAME"):
+            rxn.name = entry["NAME"][0].rstrip(";").strip()
+        if entry.get("COMMENT"):
+            _comment_flags(rxn, " ".join(entry["COMMENT"]))
+        if entry.get("ENZYME"):
+            rxn.eccodes = [ec for line in entry["ENZYME"] for ec in line.split()]
+        rxn.kos = [line[:_ID_LEN].strip() for line in entry.get("ORTHOLOGY", [])]
+        for line in entry.get("PATHWAY", []):
+            pid = line[:7].strip()
+            if pid and not pid.startswith(("rn011", "rn012")):  # skip global/overview
+                rxn.pathways.append(pid)
+        if entry.get("EQUATION"):
+            rxn.equation = " ".join(s.strip() for s in entry["EQUATION"])
+            stoich, rxn.reversible, rxn.undefined_stoich = _parse_equation(rxn.equation)
+            rxn.stoichiometry = stoich  # cached for build_reference_model
+        reactions.append(rxn)
+
+    irrev = _irreversible_from_mapformula(kegg_dir / "reaction_mapformula.lst")
+    for rxn in reactions:
+        if rxn.id in irrev:
+            rxn.reversible = False
+    return reactions
+
+
+def _irreversible_from_mapformula(path: str | Path) -> set[str]:
+    """Reaction ids that are irreversible in *every* KEGG map they appear in.
+
+    ``reaction_mapformula.lst`` lines look like ``R00005: 00330: C01010 => C00011``.
+    A reaction is considered irreversible only if no map lists it as ``<=>`` and
+    every map draws it in the same direction. Direction (substrate/product order)
+    is not propagated back into the model stoichiometry — a documented
+    simplification of RAVEN's column-flipping logic, which only affects the small
+    set of map-directional reactions.
+    """
+    path = Path(path)
+    if not path.is_file():
+        return set()
+    seen_reversible: set[str] = set()
+    products: dict[str, str] = {}
+    conflicting: set[str] = set()
+    for entry in _iter_mapformula_lines(path):
+        rid, reversible, product = entry
+        if reversible:
+            seen_reversible.add(rid)
+        elif rid in products and products[rid] != product:
+            conflicting.add(rid)  # drawn both directions across maps -> reversible
+        else:
+            products.setdefault(rid, product)
+    return {rid for rid in products if rid not in seen_reversible and rid not in conflicting}
+
+
+def _iter_mapformula_lines(path: Path) -> Iterator[tuple[str, bool, str]]:
+    with open(path, encoding="utf-8") as handle:
+        for raw in handle:
+            line = raw.strip()
+            if not line or ":" not in line:
+                continue
+            rid = line[:_ID_LEN]
+            reversible = "<=>" in line
+            product = line.split()[-1]
+            yield rid, reversible, product
+
+
+# --------------------------------------------------------------------------- #
+# Compounds
+# --------------------------------------------------------------------------- #
+@dataclass
+class KeggCompound:
+    """A metabolite parsed from the KEGG ``compound`` flat file."""
+
+    id: str
+    name: str = ""
+    formula: str = ""
+    inchi: str = ""
+    chebi: list[str] = field(default_factory=list)
+    pubchem: list[str] = field(default_factory=list)
+
+
+def parse_kegg_compounds(kegg_dir: str | Path) -> list[KeggCompound]:
+    """Parse ``<kegg_dir>/compound`` (+ optional ``compound.inchi``) into records."""
+    kegg_dir = Path(kegg_dir)
+    compounds: list[KeggCompound] = []
+    for entry in _iter_entries(kegg_dir / "compound"):
+        cid = _first_id(entry.get("ENTRY", []))
+        if not cid:
+            continue
+        cmp = KeggCompound(id=cid)
+        if entry.get("NAME"):
+            # Only the first synonym; KEGG separates them with ';'.
+            cmp.name = entry["NAME"][0].split(";")[0].strip()
+        if entry.get("FORMULA"):
+            cmp.formula = entry["FORMULA"][0].strip()
+        for line in entry.get("DBLINKS", []):
+            if line.startswith("ChEBI:"):
+                cmp.chebi += [f"CHEBI:{x}" for x in line.split(":", 1)[1].split()]
+            elif line.startswith("PubChem:"):
+                cmp.pubchem += line.split(":", 1)[1].split()
+        compounds.append(cmp)
+
+    inchis = _parse_inchis(kegg_dir / "compound.inchi")
+    for cmp in compounds:
+        if cmp.id in inchis:
+            cmp.inchi = inchis[cmp.id]
+            cmp.formula = ""  # prefer the InChI; matches RAVEN
+    return compounds
+
+
+def _parse_inchis(path: str | Path) -> dict[str, str]:
+    path = Path(path)
+    if not path.is_file():
+        return {}
+    out: dict[str, str] = {}
+    with open(path, encoding="utf-8") as handle:
+        for raw in handle:
+            cid, _, inchi = raw.rstrip("\n").partition("\t")
+            if cid and inchi:
+                out[cid.strip()] = inchi.strip()
+    return out
+
+
+# --------------------------------------------------------------------------- #
+# KOs and organism genes
+# --------------------------------------------------------------------------- #
+@dataclass
+class KeggKO:
+    """A KEGG Orthology entry: its name and the organism genes assigned to it."""
+
+    id: str
+    name: str = ""
+    genes: list[tuple[str, str]] = field(default_factory=list)  # (organism, gene)
+
+
+def parse_kegg_kos(kegg_dir: str | Path, *, keep: set[str] | None = None) -> list[KeggKO]:
+    """Parse ``<kegg_dir>/ko`` into :class:`KeggKO` records (name + organism genes).
+
+    ``keep`` limits parsing to those KO ids (e.g. only KOs linked to reactions),
+    mirroring RAVEN's ``koList`` argument — the gene lists are huge, so this is
+    the usual call.
+    """
+    ko_records: list[KeggKO] = []
+    for entry in _iter_entries(Path(kegg_dir) / "ko"):
+        ko_id = _first_id(entry.get("ENTRY", []))
+        if not ko_id or (keep is not None and ko_id not in keep):
+            continue
+        ko = KeggKO(id=ko_id)
+        if entry.get("DEFINITION"):
+            ko.name = entry["DEFINITION"][0].strip()
+        ko.genes = list(_parse_gene_lines(entry.get("GENES", [])))
+        ko_records.append(ko)
+    return ko_records
+
+
+def _parse_gene_lines(lines: list[str]) -> Iterator[tuple[str, str]]:
+    """Yield ``(organism, gene)`` pairs from a KO entry's GENES block.
+
+    Lines look like ``BSU: BSU31050(gbsB) BSU31060`` — an upper-case organism
+    code, a colon, then space-separated gene ids (with an optional ``(name)``
+    suffix that we strip). Organism codes are lower-cased to match KEGG's protein
+    sequence files (as RAVEN does).
+    """
+    for line in lines:
+        org, sep, rest = line.partition(":")
+        if not sep:
+            continue
+        organism = org.strip().lower()
+        for token in rest.split():
+            gene = token.split("(", 1)[0]
+            if gene:
+                yield organism, gene
+
+
+# --------------------------------------------------------------------------- #
+# Reference model + tables
+# --------------------------------------------------------------------------- #
+_COMPARTMENT = "s"  # single 'system' compartment, as in getModelFromKEGG
+
+
+def build_reference_model(
+    reactions: list[KeggReaction], compounds: list[KeggCompound]
+) -> cobra.Model:
+    """Assemble the gene-free KEGG reference model from parsed records.
+
+    Only metabolites actually used by a reaction are added. Reactions carry KEGG
+    annotations (reaction id, KO ids, EC codes, pathways) but **no genes/GPRs**.
+    Bounds are ``(-1000, 1000)`` for reversible reactions and ``(0, 1000)``
+    otherwise.
+    """
+    model = cobra.Model("KEGG")
+    model.name = "Automatically generated from KEGG database"
+
+    by_id = {c.id: c for c in compounds}
+    # Reuse the cached parse from parse_kegg_reactions; only re-parse for
+    # callers that constructed KeggReaction records without the cache.
+    parsed = {
+        r.id: (r.stoichiometry if r.stoichiometry else _parse_equation(r.equation)[0])
+        for r in reactions
+    }
+    used = {m for stoich in parsed.values() for m in stoich}
+
+    metabolites = []
+    for cid in sorted(used):
+        cmp = by_id.get(cid)
+        met = cobra.Metabolite(cid, compartment=_COMPARTMENT)
+        if cmp:
+            met.name = cmp.name or cid
+            met.formula = cmp.formula or None
+            if cmp.chebi:
+                met.annotation["chebi"] = cmp.chebi
+            if cmp.pubchem:
+                met.annotation["pubchem.substance"] = cmp.pubchem
+            if cmp.inchi:
+                met.annotation["inchi"] = cmp.inchi
+        else:
+            met.name = cid
+        metabolites.append(met)
+    model.add_metabolites(metabolites)
+    met_index = {m.id: m for m in metabolites}
+
+    cobra_reactions = []
+    for rxn in reactions:
+        stoich = parsed[rxn.id]
+        if not stoich:  # empty (e.g. A <=> A) -> skip, as RAVEN drops bad rxns
+            continue
+        reaction = cobra.Reaction(rxn.id, name=rxn.name)
+        reaction.bounds = (-1000.0, 1000.0) if rxn.reversible else (0.0, 1000.0)
+        reaction.add_metabolites({met_index[m]: c for m, c in stoich.items()})
+        reaction.annotation["kegg.reaction"] = rxn.id
+        if rxn.kos:
+            reaction.annotation["kegg.orthology"] = rxn.kos
+        if rxn.eccodes:
+            reaction.annotation["ec-code"] = rxn.eccodes
+        if rxn.pathways:
+            reaction.annotation["kegg.pathway"] = rxn.pathways
+        cobra_reactions.append(reaction)
+    model.add_reactions(cobra_reactions)
+    return model
+
+
+def build_kegg_tables(
+    reactions: list[KeggReaction], kos: list[KeggKO]
+) -> dict[str, pd.DataFrame]:
+    """Build the minimal relational tables from parsed records.
+
+    Returns a dict of ``DataFrame``s keyed by table name: ``ko_reaction``,
+    ``ko_names``, ``organism_gene_ko``, ``rxn_flags``.
+    """
+    ko_reaction = pd.DataFrame(
+        [(ko, r.id) for r in reactions for ko in r.kos],
+        columns=["ko", "reaction"],
+    ).drop_duplicates(ignore_index=True)
+
+    ko_names = pd.DataFrame(
+        [(ko.id, ko.name) for ko in kos], columns=["ko", "name"]
+    )
+
+    organism_gene_ko = pd.DataFrame(
+        [(org, gene, ko.id) for ko in kos for org, gene in ko.genes],
+        columns=["organism", "gene", "ko"],
+    ).drop_duplicates(ignore_index=True)
+
+    rxn_flags = pd.DataFrame(
+        [
+            (r.id, r.spontaneous, r.undefined_stoich, r.incomplete, r.general)
+            for r in reactions
+        ],
+        columns=["reaction", "spontaneous", "undefined_stoich", "incomplete", "general"],
+    )
+
+    return {
+        "ko_reaction": ko_reaction,
+        "ko_names": ko_names,
+        "organism_gene_ko": organism_gene_ko,
+        "rxn_flags": rxn_flags,
+    }
+
+
+def write_kegg_tables(tables: dict[str, pd.DataFrame], out_dir: str | Path) -> list[Path]:
+    """Write each table as a gzipped TSV (``<name>.tsv.gz``) into ``out_dir``.
+
+    Gzipped TSV is the dependency-free cross-language format shared with MATLAB
+    RAVEN (see docs/kegg_data_format.md). Returns the written paths.
+    """
+    out_dir = Path(out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    written = []
+    for name, frame in tables.items():
+        path = out_dir / f"{name}.tsv.gz"
+        with gzip.open(path, "wt", encoding="utf-8", newline="") as handle:
+            frame.to_csv(handle, sep="\t", index=False)
+        written.append(path)
+    return written
+
+
+def read_kegg_table(path: str | Path) -> pd.DataFrame:
+    """Read a KEGG table written by :func:`write_kegg_tables` or
+    :func:`stream_organism_gene_ko`.
+
+    Compression is inferred from the suffix, so both the gzipped small tables
+    (``.tsv.gz``) and the xz-compressed ``organism_gene_ko.tsv.xz`` are read
+    transparently.
+    """
+    return pd.read_csv(path, sep="\t", dtype=str, keep_default_na=False)
+
+
+def _flush_sorted_run(rows: list[str], tmp_dir: Path, run_no: int) -> Path:
+    """Sort a buffer of ``organism\\tgene\\tko\\n`` lines and write one gzipped run."""
+    rows.sort(key=_ogk_sort_key)
+    run_path = tmp_dir / f"run_{run_no:04d}.gz"
+    with gzip.open(run_path, "wt", encoding="utf-8", newline="") as run:
+        run.writelines(rows)
+    return run_path
+
+
+def _ogk_sort_key(line: str) -> tuple[str, str]:
+    """Sort key ``(organism, gene)`` for an ``organism\\tgene\\tko`` line."""
+    organism, gene, _ = line.split("\t", 2)
+    return organism, gene
+
+
+def stream_organism_gene_ko(
+    kegg_dir: str | Path, keep: set[str], ogk_path: str | Path, *, chunk_rows: int = 1_000_000
+) -> pd.DataFrame:
+    """Stream the ``ko`` file to a sorted, xz-compressed ``organism_gene_ko.tsv.xz``.
+
+    Real KEGG has ~9M gene↔KO associations — far too many to hold in memory as a
+    DataFrame. Rows are sorted by ``(organism, gene)`` before writing: gene IDs
+    from one organism share long common prefixes (locus tags, numeric runs), so
+    sorting makes them adjacent and lets the compressor shrink the table ~2.9x
+    versus the unsorted gzip form. The order also matches the by-organism query
+    pattern in :func:`get_kegg_model_for_organism`.
+
+    The sort is an **external merge sort** bounded to ``chunk_rows`` rows in
+    memory at a time (sorted runs spooled to gzipped temp files, then merged with
+    :func:`heapq.merge`), so peak memory stays flat regardless of KEGG size. Only
+    the small ``ko_names`` table (one row per KO) is held in full and returned.
+    """
+    ogk_path = Path(ogk_path)
+    names: list[tuple[str, str]] = []
+    buffer: list[str] = []
+    runs: list[Path] = []
+
+    with tempfile.TemporaryDirectory(prefix="ogk_sort_", dir=ogk_path.parent) as tmp:
+        tmp_dir = Path(tmp)
+        for entry in _iter_entries(Path(kegg_dir) / "ko"):
+            ko_id = _first_id(entry.get("ENTRY", []))
+            if not ko_id or ko_id not in keep:
+                continue
+            names.append((ko_id, entry["DEFINITION"][0].strip() if entry.get("DEFINITION") else ""))
+            for organism, gene in _parse_gene_lines(entry.get("GENES", [])):
+                buffer.append(f"{organism}\t{gene}\t{ko_id}\n")
+            if len(buffer) >= chunk_rows:
+                runs.append(_flush_sorted_run(buffer, tmp_dir, len(runs)))
+                buffer = []
+        if buffer:
+            runs.append(_flush_sorted_run(buffer, tmp_dir, len(runs)))
+
+        handles = [gzip.open(r, "rt", encoding="utf-8") for r in runs]
+        try:
+            with lzma.open(ogk_path, "wt", encoding="utf-8", newline="") as out:
+                out.write("organism\tgene\tko\n")
+                out.writelines(heapq.merge(*handles, key=_ogk_sort_key))
+        finally:
+            for h in handles:
+                h.close()
+    return pd.DataFrame(names, columns=["ko", "name"])
+
+
+def parse_kegg_dump(kegg_dir: str | Path, out_dir: str | Path) -> dict[str, Path]:
+    """Parse a full KEGG dump into the reference model + tables and write them out.
+
+    Writes ``reference_model.yml.gz`` (gzipped RAVEN/cobra YAML) plus the
+    gzipped-TSV tables into ``out_dir`` and returns ``{name: path}`` for
+    everything written. The large
+    ``organism_gene_ko`` table is streamed to disk (see
+    :func:`stream_organism_gene_ko`) rather than built in memory, so this scales
+    to the full KEGG database; the small derived tables are built in memory.
+    """
+    out_dir = Path(out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    reactions = parse_kegg_reactions(kegg_dir)
+    compounds = parse_kegg_compounds(kegg_dir)
+    linked_kos = {ko for r in reactions for ko in r.kos}
+
+    model = build_reference_model(reactions, compounds)
+
+    small = {
+        "ko_reaction": pd.DataFrame(
+            [(ko, r.id) for r in reactions for ko in r.kos], columns=["ko", "reaction"]
+        ).drop_duplicates(ignore_index=True),
+        "rxn_flags": pd.DataFrame(
+            [(r.id, r.spontaneous, r.undefined_stoich, r.incomplete, r.general) for r in reactions],
+            columns=["reaction", "spontaneous", "undefined_stoich", "incomplete", "general"],
+        ),
+    }
+    paths = {name: p for name, p in zip(small, write_kegg_tables(small, out_dir), strict=True)}
+
+    ogk_path = out_dir / "organism_gene_ko.tsv.xz"
+    ko_names = stream_organism_gene_ko(kegg_dir, linked_kos, ogk_path)
+    paths["organism_gene_ko"] = ogk_path
+    paths.update(
+        zip(["ko_names"], write_kegg_tables({"ko_names": ko_names}, out_dir), strict=True)
+    )
+
+    ref_path = out_dir / "reference_model.yml.gz"
+    write_yaml_model(model, ref_path)
+    paths["reference_model"] = ref_path
+    return paths
diff --git a/src/raven_python/reconstruction/kegg/query.py b/src/raven_python/reconstruction/kegg/query.py
new file mode 100644
index 0000000..2df3f78
--- /dev/null
+++ b/src/raven_python/reconstruction/kegg/query.py
@@ -0,0 +1,231 @@
+"""De-novo KEGG draft from a proteome FASTA via HMM search (step 3b.5).
+
+Ports the FASTA/HMM branch of RAVEN ``getKEGGModelForOrganism``: search a query
+proteome against the KO profile-HMM library (3b.3), assign genes to KOs using the
+score cut-off and the two score-ratio filters, then build the draft model with the
+shared assembler. For organisms not in KEGG.
+
+Improvement over RAVEN: one ``hmmscan`` against the single ``hmmpress``-ed library
+(K7) replaces RAVEN's per-KO ``hmmsearch`` loop. Phylogenetic-distance subsampling
+is **not** used — our prebuilt prok90/euk90 libraries already fix the sequence set,
+so picking the right domain library (not per-organism distance weighting) is the
+relevant choice.
+
+The scoring/assignment logic (:func:`assign_kos`, :func:`parse_hmmscan_tblout`) is
+pure and unit-tested; running the search needs HMMER (``hmmscan``).
+"""
+from __future__ import annotations
+
+import math
+import subprocess
+import tempfile
+from pathlib import Path
+
+import cobra
+import pandas as pd
+
+from raven_python.binaries import resolve_binary
+from raven_python.io.yaml import read_yaml_model
+from raven_python.reconstruction.kegg.assemble import assemble_model_from_ko_genes
+from raven_python.reconstruction.kegg.parse import read_kegg_table
+
+_NOTE = "Included by get_kegg_model_from_sequences (using HMMs)"
+_MIN_EVALUE = 1e-250  # floor for a reported E-value of 0, to keep logs finite
+
+
+def run_hmmscan(
+    fasta: str | Path,
+    library: str | Path,
+    *,
+    threads: int = 1,
+    hmmscan: str | Path | None = None,
+) -> str:
+    """Run ``hmmscan`` of ``fasta`` against the pressed ``library``; return tblout text."""
+    exe = resolve_binary("hmmscan", binary=hmmscan)
+    with tempfile.TemporaryDirectory() as tmp:
+        tbl = Path(tmp) / "hits.tbl"
+        cmd = [exe, "--cpu", str(threads), "--tblout", str(tbl), str(library), str(fasta)]
+        proc = subprocess.run(cmd, capture_output=True, text=True)
+        if proc.returncode != 0:
+            raise RuntimeError(f"hmmscan failed:\n{(proc.stderr or '').strip()}")
+        return tbl.read_text()
+
+
+def parse_hmmscan_tblout(text: str) -> pd.DataFrame:
+    """Parse ``hmmscan --tblout`` text into a ``[ko, gene, evalue]`` table.
+
+    In ``hmmscan`` the HMM database is the *target*, so column 1 (target name) is
+    the KO, column 3 (query name) is the proteome gene, and column 5 is the
+    full-sequence E-value.
+    """
+    rows = []
+    for line in text.splitlines():
+        if not line or line.startswith("#"):
+            continue
+        fields = line.split()
+        if len(fields) < 5:
+            continue
+        rows.append((fields[0], fields[2], float(fields[4])))
+    return pd.DataFrame(rows, columns=["ko", "gene", "evalue"])
+
+
+def assign_kos(
+    hits: pd.DataFrame,
+    *,
+    cutoff: float = 1e-30,
+    min_score_ratio_ko: float = 0.3,
+    min_score_ratio_g: float = 0.9,
+) -> dict[str, list[str]]:
+    """Assign genes to KOs from HMM hits, applying the cut-off and ratio filters.
+
+    Ports RAVEN's three steps on the KO×gene E-value matrix:
+
+    1. keep hits with ``evalue <= cutoff``;
+    2. **min_score_ratio_ko** — within a KO, drop genes whose
+       ``log(evalue)/log(best_evalue_in_KO) < min_score_ratio_ko`` (prune weak
+       members of a KO);
+    3. **min_score_ratio_g** — within a gene, drop KOs whose
+       ``log(evalue)/log(best_evalue_for_gene) < min_score_ratio_g`` (stop a gene
+       that clearly belongs to one KO leaking into weaker ones).
+
+    Smaller E-value = better; since all kept values are ``< 1`` their logs are
+    negative, so the best (smallest) hit gives ratio 1 and weaker hits give a
+    smaller positive ratio.
+
+    Default calibration (see IMPROVEMENTS K15). Cross-validated against the true
+    KEGG gene→KO annotation of four organisms spanning the prok/euk libraries and
+    the well-/lesser-studied axis (*S. cerevisiae*, *Cyanidioschyzon merolae*,
+    *E. coli*, *Mycoplasma genitalium*): real annotations score
+    overwhelmingly (median E ≈ 1e-100…1e-155) while spurious hits pile up at
+    ≈1e-8, so the two are separated by ~20 orders of magnitude. RAVEN's
+    ``1e-50`` sits inside the *true* tail and silently drops real but divergent
+    hits — costing 16% gene→KO recall on the divergent minimal genome
+    (*M. genitalium*) for no noise-rejection benefit (noise is far weaker). The
+    default is therefore loosened to **1e-30** (recovers that tail; still ~22
+    orders above the noise floor), with the precision work moved to
+    **min_score_ratio_g = 0.9** — the *effective* precision lever (it resolves
+    multi-KO genes). ``min_score_ratio_ko`` proved empirically inert across all
+    four organisms (identical output at 0.0/0.3/0.5) and is kept only for RAVEN
+    parity.
+    """
+    # The ratio filters compare log(evalue)/log(best_evalue); when best == 1.0
+    # the denominator is 0 → ZeroDivisionError. The default cutoff (1e-30) keeps
+    # us safely away, but a caller-passed cutoff ≥ 1 is ambiguous and would
+    # crash later. Reject it up front with a clear message.
+    if cutoff >= 1:
+        raise ValueError(
+            f"cutoff must be < 1 (smaller E-value = better hit); got {cutoff!r}."
+        )
+
+    # Best (smallest) E-value per (ko, gene), filtered at the cut-off.
+    mat: dict[str, dict[str, float]] = {}
+    for ko, gene, evalue in zip(hits["ko"], hits["gene"], hits["evalue"], strict=True):
+        if evalue > cutoff:
+            continue
+        e = evalue if evalue > 0 else _MIN_EVALUE
+        per_ko = mat.setdefault(ko, {})
+        if gene not in per_ko or e < per_ko[gene]:
+            per_ko[gene] = e
+
+    # Step 2: prune weak genes within each KO.
+    for ko, genes in mat.items():
+        log_best = math.log(min(genes.values()))
+        mat[ko] = {
+            g: e for g, e in genes.items() if math.log(e) / log_best >= min_score_ratio_ko
+        }
+
+    # Step 3: prune weak KOs within each gene (over the survivors of step 2).
+    gene_kos: dict[str, dict[str, float]] = {}
+    for ko, genes in mat.items():
+        for g, e in genes.items():
+            gene_kos.setdefault(g, {})[ko] = e
+    dropped: set[tuple[str, str]] = set()
+    for g, kos in gene_kos.items():
+        log_best = math.log(min(kos.values()))
+        for ko, e in kos.items():
+            if math.log(e) / log_best < min_score_ratio_g:
+                dropped.add((ko, g))
+
+    result: dict[str, list[str]] = {}
+    for ko, genes in mat.items():
+        kept = sorted(g for g in genes if (ko, g) not in dropped)
+        if kept:
+            result[ko] = kept
+    return result
+
+
+def get_kegg_model_from_sequences(
+    fasta: str | Path,
+    reference_model: cobra.Model,
+    ko_reaction: pd.DataFrame,
+    library: str | Path,
+    *,
+    rxn_flags: pd.DataFrame | None = None,
+    model_id: str | None = None,
+    cutoff: float = 1e-30,
+    min_score_ratio_ko: float = 0.3,
+    min_score_ratio_g: float = 0.9,
+    keep_spontaneous: bool = True,
+    keep_undefined_stoich: bool = True,
+    keep_incomplete: bool = True,
+    keep_general: bool = False,
+    threads: int = 1,
+    hmmscan: str | Path | None = None,
+) -> cobra.Model:
+    """Reconstruct a draft model for a proteome by HMM-searching the KO library.
+
+    Searches ``fasta`` against the pressed ``library`` (3b.3), assigns KOs
+    (:func:`assign_kos`), and assembles the model against ``reference_model`` /
+    ``ko_reaction``. Genes are the query proteome's identifiers.
+    """
+    hits = parse_hmmscan_tblout(run_hmmscan(fasta, library, threads=threads, hmmscan=hmmscan))
+    ko_to_genes = assign_kos(
+        hits,
+        cutoff=cutoff,
+        min_score_ratio_ko=min_score_ratio_ko,
+        min_score_ratio_g=min_score_ratio_g,
+    )
+    model, _ = assemble_model_from_ko_genes(
+        reference_model,
+        ko_reaction,
+        ko_to_genes,
+        rxn_flags=rxn_flags,
+        keep_spontaneous=keep_spontaneous,
+        keep_undefined_stoich=keep_undefined_stoich,
+        keep_incomplete=keep_incomplete,
+        keep_general=keep_general,
+        model_id=model_id,
+        note=_NOTE,
+    )
+    return model
+
+
+def get_kegg_model_from_sequences_with_artefacts(
+    fasta: str | Path,
+    artefact_dir: str | Path | None = None,
+    library: str | Path | None = None,
+    *,
+    domain: str = "prokaryotes",
+    version: str | None = None,
+    **kwargs,
+) -> cobra.Model:
+    """Load reference model + tables from ``artefact_dir`` and run the HMM query.
+
+    If ``artefact_dir`` / ``library`` are ``None`` they are fetched/cached via
+    :func:`raven_python.data.ensure_kegg_data` / :func:`raven_python.data.ensure_kegg_hmm_library`
+    (``domain`` selects the prok/euk library; ``version`` the release).
+    """
+    if artefact_dir is None or library is None:
+        from raven_python.data import ensure_kegg_data, ensure_kegg_hmm_library
+
+        if artefact_dir is None:
+            artefact_dir = ensure_kegg_data(version=version)
+        if library is None:
+            library = ensure_kegg_hmm_library(domain, version=version)
+    artefact_dir = Path(artefact_dir)
+    reference_model = read_yaml_model(artefact_dir / "reference_model.yml.gz")
+    ko_reaction = read_kegg_table(artefact_dir / "ko_reaction.tsv.gz")
+    rxn_flags = read_kegg_table(artefact_dir / "rxn_flags.tsv.gz")
+    return get_kegg_model_from_sequences(
+        fasta, reference_model, ko_reaction, library, rxn_flags=rxn_flags, **kwargs
+    )
diff --git a/src/raven_python/reconstruction/kegg/taxonomy.py b/src/raven_python/reconstruction/kegg/taxonomy.py
new file mode 100644
index 0000000..463fcce
--- /dev/null
+++ b/src/raven_python/reconstruction/kegg/taxonomy.py
@@ -0,0 +1,71 @@
+"""Parse the KEGG ``taxonomy`` file into per-organism category lineages.
+
+Ports the file-reading half of RAVEN ``getPhylDist`` (the distance-matrix half is
+step 3b.5). The ``taxonomy`` file is an indented tree: ``#``-prefixed lines name a
+category, the number of leading ``#`` giving its depth; organism lines are
+tab-separated ``T-number<tab>org_id<tab>name<tab>...``. Each organism inherits the
+stack of categories above it, the first of which is its domain (``Prokaryotes`` /
+``Eukaryotes``).
+
+Used by 3b.3 to split genes into the prok/euk HMM libraries, and (later) by 3b.5
+for phylogenetic distances.
+"""
+from __future__ import annotations
+
+import warnings
+from pathlib import Path
+
+
+def parse_taxonomy(path: str | Path) -> dict[str, list[str]]:
+    """Return ``{organism_id: [category, ...]}`` from outermost to innermost."""
+    org_categories: dict[str, list[str]] = {}
+    stack: list[str] = []
+    skipped_level_warned = False
+    with open(path, encoding="utf-8") as handle:
+        for line_no, raw in enumerate(handle, start=1):
+            line = raw.rstrip("\n")
+            if not line.strip():
+                continue
+            if line.startswith("#"):
+                depth = len(line) - len(line.lstrip("#"))
+                name = line[depth:].strip()
+                if depth - 1 > len(stack):
+                    # Depth-skip (e.g. ## then ####): the original `stack[:depth-1]`
+                    # silently produced a too-short lineage. Pad with explicit
+                    # blanks so downstream slices stay aligned; warn once.
+                    if not skipped_level_warned:
+                        warnings.warn(
+                            f"{path}: taxonomy depth skips a level near line {line_no} "
+                            f"({'#' * depth} {name!r} appeared with stack {stack!r}); "
+                            "padding the missing levels with '' (later occurrences silenced).",
+                            stacklevel=2,
+                        )
+                        skipped_level_warned = True
+                    stack = stack + [""] * (depth - 1 - len(stack))
+                else:
+                    stack = stack[: depth - 1]
+                stack.append(name)
+            else:
+                fields = line.split("\t") if "\t" in line else line.split()
+                if len(fields) < 2:
+                    continue
+                org_categories[fields[1].strip()] = list(stack)
+    return org_categories
+
+
+def organism_domains(path: str | Path) -> dict[str, str]:
+    """Return ``{organism_id: domain}`` (the outermost category)."""
+    return {org: cats[0] for org, cats in parse_taxonomy(path).items() if cats}
+
+
+def organisms_in_domain(path: str | Path, domain: str) -> set[str]:
+    """Organism ids whose outermost category matches ``domain`` (case-insensitive).
+
+    Accepts a prefix, so ``"prok"`` matches ``"Prokaryotes"``.
+    """
+    needle = domain.lower()
+    return {
+        org
+        for org, dom in organism_domains(path).items()
+        if dom.lower().startswith(needle) or needle.startswith(dom.lower())
+    }
diff --git a/src/raven_python/tasks/__init__.py b/src/raven_python/tasks/__init__.py
new file mode 100644
index 0000000..d232c16
--- /dev/null
+++ b/src/raven_python/tasks/__init__.py
@@ -0,0 +1,23 @@
+"""Metabolic task definition, parsing, and checking.
+
+* :class:`Task` + :func:`parse_task_list` — the task-list file format.
+* :func:`check_tasks` + :class:`TaskResult` — run tasks against a model.
+* :func:`find_task_essential_reactions` + :class:`EssentialReactionsResult` — reactions
+  a model must use to satisfy a task list (the input for (f)tINIT's task layer).
+"""
+from raven_python.tasks.check import (
+    EssentialReactionsResult,
+    TaskResult,
+    check_tasks,
+    find_task_essential_reactions,
+)
+from raven_python.tasks.tasklist import Task, parse_task_list
+
+__all__ = [
+    "EssentialReactionsResult",
+    "Task",
+    "TaskResult",
+    "check_tasks",
+    "find_task_essential_reactions",
+    "parse_task_list",
+]
diff --git a/src/raven_python/tasks/check.py b/src/raven_python/tasks/check.py
new file mode 100644
index 0000000..817bae5
--- /dev/null
+++ b/src/raven_python/tasks/check.py
@@ -0,0 +1,332 @@
+"""Check whether a model performs a set of metabolic tasks.
+
+For each task the model is constrained by the task's allowed inputs/outputs (and any
+extra reactions / bound changes), then tested for feasibility: a task *passes* if a
+steady-state flux exists, unless it is marked ``should_fail`` (then it passes iff
+infeasible).
+
+Inputs/outputs are encoded as ranges on the per-metabolite mass-balance constraint
+(``model.constraints[met.id]``): an input allows net consumption (``Sv ∈ [-UB, -LB]``)
+and an output allows / requires net production (``Sv ≤ UB``, and ``≥ LB`` if
+``LB > 0``). Existing boundary reactions are closed first, so inputs/outputs are
+defined solely by the task (closed-model semantics).
+"""
+from __future__ import annotations
+
+import pickle
+from collections.abc import Iterable
+from dataclasses import dataclass
+from pathlib import Path
+
+import cobra
+from cobra.exceptions import OptimizationError
+from cobra.flux_analysis import flux_variability_analysis, pfba
+from optlang.symbolics import Zero
+
+from raven_python.manipulation.add import add_reactions_from_equations
+from raven_python.tasks.tasklist import Task, parse_task_list
+
+_ALLMETS = "ALLMETS"
+_ALLMETSIN = "ALLMETSIN"
+
+
+@dataclass
+class TaskResult:
+    """Result of one task: ``passed`` is the verdict (accounts for ``should_fail``)."""
+
+    id: str
+    description: str
+    passed: bool
+    feasible: bool
+    error: str | None = None
+
+
+def _set_constraint_bounds(constraint, lb: float, ub: float) -> None:
+    """Set an optlang constraint's bounds without a transient lb > ub."""
+    if lb > constraint.ub:
+        constraint.ub = ub
+        constraint.lb = lb
+    else:
+        constraint.lb = lb
+        constraint.ub = ub
+
+
+def _classify(token: str) -> tuple[str, str | None]:
+    """Return ``("all", None)``, ``("comp", COMP)``, or ``("met", token_upper)``."""
+    upper = token.upper()
+    if upper == _ALLMETS:
+        return "all", None
+    if upper.startswith(_ALLMETSIN + "[") and upper.endswith("]"):
+        return "comp", upper[len(_ALLMETSIN) + 1: -1]
+    return "met", upper  # incl. malformed ALLMETSIN[... → treated as a (missing) metabolite
+
+
+def _metabolite_bounds(
+    task: Task, name_to_ids: dict[str, list[str]], comp_to_ids: dict[str, list[str]]
+) -> tuple[dict[str, list[float]], list[str]]:
+    """Compute ``{met_id: [lb, ub]}`` from a task's inputs/outputs (RAVEN ``b``).
+
+    Bulk tokens (ALLMETS / ALLMETSIN) are applied before specific metabolites, as
+    RAVEN does. Returns the bounds and a list of unresolved tokens (→ task error).
+    """
+    bounds: dict[str, list[float]] = {}
+    missing: list[str] = []
+
+    def touch(mid: str) -> list[float]:
+        return bounds.setdefault(mid, [0.0, 0.0])
+
+    for entries, is_input in ((task.inputs, True), (task.outputs, False)):
+        bulk = [(t, lb, ub) for (t, lb, ub) in entries if _classify(t)[0] != "met"]
+        specific = [(t, lb, ub) for (t, lb, ub) in entries if _classify(t)[0] == "met"]
+        for token, lb, ub in bulk + specific:
+            kind, arg = _classify(token)
+            if kind == "all":
+                ids = [mid for group in comp_to_ids.values() for mid in group]
+            elif kind == "comp":
+                ids = comp_to_ids.get(arg, [])
+            else:
+                ids = name_to_ids.get(arg, [])
+                if not ids:
+                    missing.append(token)
+                    continue
+            for mid in ids:
+                b = touch(mid)
+                if is_input:
+                    b[0] = -ub  # allow net consumption up to UB (RAVEN b1 = -UBin)
+                    if kind == "met":
+                        b[1] = -lb
+                else:
+                    b[1] = ub  # allow net production up to UB
+                    if kind == "met" and lb > 0:
+                        b[0] = lb  # require at least LB produced
+    return bounds, missing
+
+
+def task_name_maps(model: cobra.Model) -> tuple[dict[str, list[str]], dict[str, list[str]]]:
+    """Build ``name[comp]→[ids]`` and ``comp→[ids]`` lookups for a model's metabolites.
+
+    ``name[comp]`` maps to a *list* because a model can carry several metabolites with
+    the same name and compartment; a task referencing it constrains all of them (as
+    RAVEN does), rather than an arbitrary one.
+    """
+    name_to_ids: dict[str, list[str]] = {}
+    comp_to_ids: dict[str, list[str]] = {}
+    for m in model.metabolites:
+        name_to_ids.setdefault(f"{m.name}[{m.compartment}]".upper(), []).append(m.id)
+        comp_to_ids.setdefault((m.compartment or "").upper(), []).append(m.id)
+    return name_to_ids, comp_to_ids
+
+
+def apply_task_constraints(
+    model: cobra.Model, task: Task, name_to_id, comp_to_ids
+) -> tuple[set[str], str | None]:
+    """Apply a task's inputs/outputs/equations/bound-changes to ``model`` in place.
+
+    Sets a feasibility (zero) objective. Returns ``(task_metabolite_ids, error)``;
+    ``task_metabolite_ids`` are the model metabolites the task references (RAVEN's
+    ``essentialMetsForTasks``). On error the model may be partially modified.
+    """
+    bounds, missing = _metabolite_bounds(task, name_to_id, comp_to_ids)
+    if missing:
+        return set(), f"unknown metabolite(s): {sorted(set(missing))}"
+    task_mets = {mid for mid in bounds}
+    for mid, (lb, ub) in bounds.items():
+        if (lb, ub) != (0.0, 0.0):
+            _set_constraint_bounds(model.constraints[mid], lb, ub)
+
+    if task.equations:
+        existing = {m.id for m in model.metabolites}
+        specs = [
+            {"id": f"TASK_TMP_{i}", "equation": equ, "bounds": (lb, ub)}
+            for i, (equ, lb, ub) in enumerate(task.equations)
+        ]
+        add_reactions_from_equations(model, specs, mets_by="name", allow_new_mets=True)
+        for i in range(len(specs)):
+            tmp = model.reactions.get_by_id(f"TASK_TMP_{i}")
+            task_mets |= {m.id for m in tmp.metabolites if m.id in existing}
+
+    for rxn_id, lb, ub in task.changed:
+        if rxn_id not in model.reactions:
+            return set(), f"CHANGED RXN not in model: {rxn_id!r}"
+        model.reactions.get_by_id(rxn_id).bounds = (lb, ub)
+
+    model.objective = model.problem.Objective(Zero, direction="max")  # feasibility only
+    return task_mets, None
+
+
+def _build_task_model(
+    base: cobra.Model, task: Task, name_to_id, comp_to_ids
+) -> tuple[cobra.Model | None, set[str], str | None]:
+    """Copy ``base`` and apply a task's constraints (``model``/``error`` exclusive)."""
+    model = base.copy()
+    task_mets, error = apply_task_constraints(model, task, name_to_id, comp_to_ids)
+    return (None if error else model), task_mets, error
+
+
+def _run_task(base: cobra.Model, task: Task, name_to_id, comp_to_ids) -> TaskResult:
+    """Test one task by applying its constraints to ``base`` in place, then reverting.
+
+    Avoids copying the (genome-scale) model per task — the copy dominates ``check_tasks``
+    runtime. ``with base:`` reverts everything ``apply_task_constraints`` does through
+    cobra's API (temp reactions/metabolites for equations, reaction bounds, objective);
+    the one untracked change — direct metabolite mass-balance (``model.constraints[mid]``)
+    bound edits — is snapshotted and restored explicitly. Net result is identical to the
+    copy-based version but reuses a single model across all tasks.
+    """
+    bounds, missing = _metabolite_bounds(task, name_to_id, comp_to_ids)
+    if missing:
+        return TaskResult(task.id, task.description, False, False,
+                          f"unknown metabolite(s): {sorted(set(missing))}")
+    saved = {mid: (base.constraints[mid].lb, base.constraints[mid].ub) for mid in bounds}
+    try:
+        with base:  # reverts temp reactions/mets, reaction bounds, objective on exit
+            _, error = apply_task_constraints(base, task, name_to_id, comp_to_ids)
+            if error is not None:
+                return TaskResult(task.id, task.description, False, False, error)
+            base.slim_optimize()
+            feasible = base.solver.status == "optimal"
+    finally:  # restore the untracked metabolite-constraint bound edits
+        for mid, (lb, ub) in saved.items():
+            _set_constraint_bounds(base.constraints[mid], lb, ub)
+    return TaskResult(task.id, task.description, feasible != task.should_fail, feasible)
+
+
+def check_tasks(
+    model: cobra.Model,
+    tasks: str | Iterable[Task],
+    *,
+    close_boundaries: bool = True,
+) -> list[TaskResult]:
+    """Run a task list against ``model`` and return a :class:`TaskResult` per task.
+
+    ``tasks`` is a parsed list of :class:`Task` or a path to a task-list file. With
+    ``close_boundaries`` (default), existing exchange/sink/demand reactions are
+    closed so inputs/outputs are defined purely by the tasks (as RAVEN assumes).
+    """
+    tasks = _as_tasks(tasks)
+    base, name_to_id, comp_to_ids = _prepare_base(model, close_boundaries)
+    return [_run_task(base, task, name_to_id, comp_to_ids) for task in tasks]
+
+
+def _as_tasks(tasks: str | Iterable[Task]) -> list[Task]:
+    if isinstance(tasks, (str, bytes)) or hasattr(tasks, "__fspath__"):
+        return parse_task_list(tasks)
+    return list(tasks)
+
+
+def _prepare_base(model: cobra.Model, close_boundaries: bool):
+    base = model.copy()
+    if close_boundaries:
+        for rxn in base.boundary:
+            rxn.bounds = (0.0, 0.0)
+    name_to_id, comp_to_ids = task_name_maps(base)
+    return base, name_to_id, comp_to_ids
+
+
+@dataclass
+class EssentialReactionsResult:
+    """Reactions a model *must* use to perform a task list (RAVEN ``essentialRxns``).
+
+    ``reactions`` maps reaction id → forced flux direction (``+1`` forward, ``-1``
+    reverse): the reaction must carry flux of that sign in every feasible solution of
+    at least one task. ``per_task`` is the same, split by task id. ``task_metabolites``
+    are the model metabolites the tasks reference (RAVEN ``essentialMetsForTasks``,
+    protected from removal). ``failed_tasks`` are tasks that were infeasible or
+    malformed and thus skipped (RAVEN drops these from the task list).
+    """
+
+    reactions: dict[str, int]
+    per_task: dict[str, dict[str, int]]
+    task_metabolites: set[str]
+    failed_tasks: list[str]
+
+
+def _task_essential_reactions(
+    task_model: cobra.Model, candidates: list[str], tol: float
+) -> dict[str, int]:
+    """Reactions in ``candidates`` forced to carry flux, with direction, via FVA.
+
+    A reaction is *essential* for the task iff zero is not attainable in any feasible
+    solution — i.e. its FVA range excludes 0. This is exactly RAVEN's
+    "constrain to 0 → infeasible" definition, but obtained from FVA ranges (no
+    per-reaction knockout loop). The nonzero side of the range gives the forced
+    direction. FVA is restricted to ``candidates`` — the reactions carrying flux in a
+    minimal feasible solution, the only ones that *can* be essential (an essential
+    reaction is nonzero in every feasible solution, so also in that one) — which keeps
+    this cheap on genome-scale templates instead of ranging all reactions.
+    """
+    if not candidates:
+        return {}
+    fva = flux_variability_analysis(task_model, reaction_list=candidates, fraction_of_optimum=0.0)
+    essential: dict[str, int] = {}
+    for rxn_id, lo, hi in zip(fva.index, fva["minimum"], fva["maximum"], strict=True):
+        if lo > tol:
+            essential[rxn_id] = 1
+        elif hi < -tol:
+            essential[rxn_id] = -1
+    return essential
+
+
+def find_task_essential_reactions(
+    model: cobra.Model,
+    tasks: str | Iterable[Task],
+    *,
+    close_boundaries: bool = True,
+    tol: float = 1e-8,
+    cache_path: str | Path | None = None,
+) -> EssentialReactionsResult:
+    """Find the reactions a model must use to satisfy a task list.
+
+    For each task the model is constrained as in :func:`check_tasks`, then FVA
+    identifies reactions whose flux can never be zero (essential) and their forced
+    direction. This is the ``prepINITModel`` step that feeds (ft)INIT: essential
+    reactions are kept regardless of expression score and made irreversible in their
+    forced direction. When a reaction is essential in several tasks with conflicting
+    directions, the majority wins (ties → forward), matching RAVEN's ``pos < neg``.
+
+    On a genome-scale model this is slow (an FVA per task). Pass ``cache_path`` to make
+    it **resumable**: each task's result is written there as it completes (atomically),
+    and a re-run skips tasks already cached — so it survives interruptions and finishes
+    across several sessions.
+    """
+    tasks = _as_tasks(tasks)
+    base, name_to_id, comp_to_ids = _prepare_base(model, close_boundaries)
+    original_ids = {r.id for r in base.reactions}
+
+    per_task: dict[str, dict[str, int]] = {}
+    task_metabolites: set[str] = set()
+    failed: list[str] = []
+    if cache_path is not None and Path(cache_path).exists():
+        cached = pickle.load(open(cache_path, "rb"))
+        per_task, task_metabolites, failed = cached["per_task"], set(cached["mets"]), list(cached["failed"])
+
+    done = set(per_task) | set(failed)
+    for task in tasks:
+        if task.should_fail or task.id in done:
+            continue  # a should-fail task defines no essentials; cached ones are skipped
+        task_model, task_mets, error = _build_task_model(base, task, name_to_id, comp_to_ids)
+        if error is not None:
+            failed.append(task.id)
+        else:
+            # One min-flux solve both proves feasibility and yields the essential-reaction
+            # candidates (the original reactions carrying flux in a sparse solution).
+            try:
+                fluxes = pfba(task_model).fluxes
+                candidates = [rid for rid in original_ids if abs(fluxes.get(rid, 0.0)) > tol]
+                task_metabolites |= task_mets
+                per_task[task.id] = _task_essential_reactions(task_model, candidates, tol)
+            except OptimizationError:
+                failed.append(task.id)
+        if cache_path is not None:  # atomic checkpoint after each task
+            tmp = Path(f"{cache_path}.part")
+            pickle.dump({"per_task": per_task, "mets": task_metabolites, "failed": failed},
+                        open(tmp, "wb"))
+            tmp.replace(cache_path)
+
+    # Majority direction; tie (sum == 0) → forward, as RAVEN's `pos < neg`.
+    direction_votes: dict[str, int] = {}
+    for essential in per_task.values():
+        for rxn_id, direction in essential.items():
+            direction_votes[rxn_id] = direction_votes.get(rxn_id, 0) + direction
+    reactions = {rid: (-1 if votes < 0 else 1) for rid, votes in direction_votes.items()}
+    return EssentialReactionsResult(reactions, per_task, task_metabolites, failed)
diff --git a/src/raven_python/tasks/tasklist.py b/src/raven_python/tasks/tasklist.py
new file mode 100644
index 0000000..5bdbcb0
--- /dev/null
+++ b/src/raven_python/tasks/tasklist.py
@@ -0,0 +1,141 @@
+"""Parse a metabolic task list.
+
+A task list defines, per task, allowed inputs/outputs, optional extra reactions
+(equations), reaction-bound changes, and whether the task *should fail*. Tasks
+are checked with :func:`raven_python.tasks.check_tasks`.
+
+The file is tab-delimited (``.txt``/``.tsv``) or Excel (``.xlsx``, sheet ``TASKS``;
+needs the ``[excel]`` extra). Recognised columns (the only required one is ``ID``):
+
+    ID · DESCRIPTION · IN · IN LB · IN UB · OUT · OUT LB · OUT UB ·
+    EQU · EQU LB · EQU UB · CHANGED RXN · CHANGED LB · CHANGED UB ·
+    SHOULD FAIL · PRINT FLUX · COMMENTS
+
+A task spans consecutive rows; only its first row carries an ID. Metabolites are
+written ``name[compartment]``; several in one cell are separated by ``;`` (sharing
+that row's bounds). ``IN``/``OUT`` default LB 0, UB 1000; ``EQU`` defaults LB
+-1000 if reversible (``<=>``) else 0, UB 1000. The special tokens ``ALLMETS`` and
+``ALLMETSIN[comp]`` allow free uptake/excretion of all metabolites (only the upper
+bound is used).
+"""
+from __future__ import annotations
+
+import csv
+import warnings
+from dataclasses import dataclass, field
+from pathlib import Path
+
+_COLUMNS = (
+    "ID", "DESCRIPTION", "IN", "IN LB", "IN UB", "OUT", "OUT LB", "OUT UB",
+    "EQU", "EQU LB", "EQU UB", "CHANGED RXN", "CHANGED LB", "CHANGED UB",
+    "SHOULD FAIL", "PRINT FLUX", "COMMENTS",
+)
+
+
+@dataclass
+class Task:
+    """One metabolic task. Bounds are ``(metabolite_or_reaction, lb, ub)`` triples."""
+
+    id: str
+    description: str = ""
+    should_fail: bool = False
+    print_fluxes: bool = False
+    comments: str = ""
+    inputs: list[tuple[str, float, float]] = field(default_factory=list)
+    outputs: list[tuple[str, float, float]] = field(default_factory=list)
+    equations: list[tuple[str, float, float]] = field(default_factory=list)
+    changed: list[tuple[str, float, float]] = field(default_factory=list)
+
+
+def _truthy(value: str) -> bool:
+    return value.strip().lower() not in ("", "0", "false", "no")
+
+
+def _num(value: str, default: float) -> float:
+    value = value.strip()
+    return float(value) if value else default
+
+
+def _read_rows(path: str | Path) -> list[list[str]]:
+    path = Path(path)
+    if path.suffix.lower() in (".xlsx", ".xlsm"):
+        try:
+            from openpyxl import load_workbook
+        except ImportError as exc:  # pragma: no cover - optional dep
+            raise ImportError("Reading .xlsx task lists needs the '[excel]' extra (openpyxl).") from exc
+        wb = load_workbook(path, data_only=True)
+        if "TASKS" not in wb.sheetnames:
+            raise ValueError(
+                f"{path}: workbook has no sheet named 'TASKS' "
+                f"(found: {wb.sheetnames}). Rename the sheet or pick that file."
+            )
+        ws = wb["TASKS"]
+        return [["" if c is None else str(c) for c in row] for row in ws.iter_rows(values_only=True)]
+    with open(path, encoding="utf-8", newline="") as handle:
+        return [row for row in csv.reader(handle, delimiter="\t")]
+
+
+def parse_task_list(path: str | Path) -> list[Task]:
+    """Parse a task-list file into :class:`Task` objects."""
+    rows = _read_rows(path)
+    header_idx = next(
+        (i for i, r in enumerate(rows) if any(c.strip().upper() == "ID" for c in r)), None
+    )
+    if header_idx is None:
+        raise ValueError(f"{path}: no header row with an 'ID' column found.")
+    header = [c.strip().upper() for c in rows[header_idx]]
+    col = {name: header.index(name) for name in _COLUMNS if name in header}
+
+    def cell(row: list[str], name: str) -> str:
+        i = col.get(name)
+        return row[i].strip() if i is not None and i < len(row) else ""
+
+    # Columns whose presence on an ID-less row signals real continuation data
+    # (vs. pure whitespace/comment), used by the orphan-row warning below.
+    _DATA_COLS = ("IN", "OUT", "EQU", "CHANGED RXN")
+
+    tasks: list[Task] = []
+    current: Task | None = None
+    for row_no, row in enumerate(rows[header_idx + 1:], start=header_idx + 2):
+        if not any(c.strip() for c in row):
+            continue
+        rid = cell(row, "ID")
+        if rid.startswith("#"):
+            continue
+        if rid:
+            current = Task(
+                id=rid,
+                description=cell(row, "DESCRIPTION"),
+                should_fail=_truthy(cell(row, "SHOULD FAIL")),
+                print_fluxes=_truthy(cell(row, "PRINT FLUX")),
+                comments=cell(row, "COMMENTS"),
+            )
+            tasks.append(current)
+        if current is None:
+            # Continuation row appearing before any task ID: silently dropping it
+            # used to mask malformed task files. Warn (and skip) so the user sees it.
+            if any(cell(row, c) for c in _DATA_COLS):
+                warnings.warn(
+                    f"{path}: row {row_no} carries task data but no task ID has "
+                    "been seen yet; the row is being skipped.",
+                    stacklevel=2,
+                )
+            continue
+        _add_row(current, row, cell)
+    return tasks
+
+
+def _add_row(task: Task, row: list[str], cell) -> None:
+    if inp := cell(row, "IN"):
+        lb, ub = _num(cell(row, "IN LB"), 0.0), _num(cell(row, "IN UB"), 1000.0)
+        task.inputs += [(m.strip(), lb, ub) for m in inp.split(";") if m.strip()]
+    if out := cell(row, "OUT"):
+        lb, ub = _num(cell(row, "OUT LB"), 0.0), _num(cell(row, "OUT UB"), 1000.0)
+        task.outputs += [(m.strip(), lb, ub) for m in out.split(";") if m.strip()]
+    if equ := cell(row, "EQU"):
+        lb = _num(cell(row, "EQU LB"), -1000.0 if "<=>" in equ else 0.0)
+        ub = _num(cell(row, "EQU UB"), 1000.0)
+        task.equations.append((equ.strip(), lb, ub))
+    if chg := cell(row, "CHANGED RXN"):
+        lb, ub = _num(cell(row, "CHANGED LB"), -1000.0), _num(cell(row, "CHANGED UB"), 1000.0)
+        task.changed += [(r.strip(), lb, ub) for r in chg.split(";") if r.strip()]
diff --git a/src/raven_python/utils/__init__.py b/src/raven_python/utils/__init__.py
new file mode 100644
index 0000000..7127bdd
--- /dev/null
+++ b/src/raven_python/utils/__init__.py
@@ -0,0 +1,16 @@
+"""Shared helpers — GPR linting, elemental balance, model curation checks, id sorting."""
+from raven_python.utils.balance import ElementalBalance, get_elemental_balance
+from raven_python.utils.gpr import GPRIssue, find_non_dnf_grrules, is_dnf
+from raven_python.utils.sort import sort_identifiers
+from raven_python.utils.validate import ModelIssue, check_model
+
+__all__ = [
+    "ElementalBalance",
+    "GPRIssue",
+    "ModelIssue",
+    "check_model",
+    "find_non_dnf_grrules",
+    "get_elemental_balance",
+    "is_dnf",
+    "sort_identifiers",
+]
diff --git a/src/raven_python/utils/balance.py b/src/raven_python/utils/balance.py
new file mode 100644
index 0000000..ee64ab4
--- /dev/null
+++ b/src/raven_python/utils/balance.py
@@ -0,0 +1,89 @@
+"""Check the elemental balance of reactions, distinguishing *unbalanced* from
+*unknown* (missing formula).
+
+cobra's ``reaction.check_mass_balance()`` silently treats a missing formula as
+empty, so a reaction can look "unbalanced" — or even balanced — when the truth is
+that the data is incomplete. This module checks for missing formulas first and
+returns a graded status
+per reaction (``balanced`` / ``unbalanced`` / ``unknown``) plus the element
+imbalance — over a batch, as structured data.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+import cobra
+
+
+@dataclass(frozen=True)
+class ElementalBalance:
+    """Balance result for one reaction.
+
+    Attributes
+    ----------
+    reaction_id
+        ID of the reaction.
+    status
+        ``"balanced"`` — elements balance;
+        ``"unbalanced"`` — they do not (see ``imbalance``);
+        ``"unknown"`` — at least one metabolite has no formula, so it cannot be
+        determined (cobra would silently miscount these).
+    imbalance
+        Element → net coefficient (products − reactants), only for
+        ``"unbalanced"``; empty otherwise. Charge is not included.
+    """
+
+    reaction_id: str
+    status: str
+    imbalance: dict[str, float] = field(default_factory=dict)
+
+
+def get_elemental_balance(
+    model: cobra.Model, reactions=None
+) -> list[ElementalBalance]:
+    """Check whether reactions are elementally balanced.
+    Parameters
+    ----------
+    reactions
+        Reaction IDs/objects to check; default all reactions. (Boundary
+        reactions exchange mass with the environment and will read as
+        ``unbalanced`` — filter them out if that is not wanted.)
+
+    Returns
+    -------
+    list of ElementalBalance
+        One entry per checked reaction, in model order.
+    """
+    if reactions is None:
+        rxns = list(model.reactions)
+    else:
+        if isinstance(reactions, (str, cobra.Reaction)):
+            reactions = [reactions]
+        rxns = [
+            r if isinstance(r, cobra.Reaction) else model.reactions.get_by_id(r)
+            for r in reactions
+        ]
+
+    results: list[ElementalBalance] = []
+    for rxn in rxns:
+        if not rxn.metabolites:
+            # A reaction with no metabolites used to fall through to ``balanced``
+            # (vacuously) because ``any()`` over the empty list is False and the
+            # zero-element imbalance dict is empty. Treat the no-formula case
+            # (zero formulae present) as ``unknown``: we can't determine balance
+            # for a reaction without stoichiometry.
+            results.append(ElementalBalance(rxn.id, "unknown"))
+            continue
+        if any(not met.formula for met in rxn.metabolites):
+            results.append(ElementalBalance(rxn.id, "unknown"))
+            continue
+        imbalance = {
+            element: amount
+            for element, amount in rxn.check_mass_balance().items()
+            if element != "charge"
+        }
+        if imbalance:
+            results.append(ElementalBalance(rxn.id, "unbalanced", imbalance))
+        else:
+            results.append(ElementalBalance(rxn.id, "balanced"))
+    return results
diff --git a/src/raven_python/utils/gpr.py b/src/raven_python/utils/gpr.py
new file mode 100644
index 0000000..2e2122d
--- /dev/null
+++ b/src/raven_python/utils/gpr.py
@@ -0,0 +1,119 @@
+"""GPR (gene-protein-reaction rule) linting.
+
+Flag GPRs that are *not* in disjunctive normal form ("OR of AND-complexes"), via cobra's
+GPR AST. GPR syntax *normalisation* is already done by cobra on assignment, so it isn't
+re-implemented here.
+
+Part (2) has no cobrapy equivalent and is ported here, reworked onto cobra's
+GPR AST instead of RAVEN's brittle substring search. The relevant property is
+**disjunctive normal form (DNF)**: an OR of AND-clauses of single genes, e.g.
+``(G1 and G2) or G3``. Rules where an AND contains an OR — e.g.
+``(G1 or G2) and (G3 or G4)`` — are *valid* for cobra but ambiguous for the
+isoenzyme/complex reasoning used across RAVEN/GECKO, and ``expand_model``
+(see :mod:`raven_python.manipulation.expand`) only does something for DNF rules.
+:func:`find_non_dnf_grrules` surfaces them as structured data rather than, as
+RAVEN did, only printing a warning.
+"""
+from __future__ import annotations
+
+import ast
+from dataclasses import dataclass
+
+import cobra
+from cobra.core.gene import GPR
+
+
+def _contains_or(node: ast.AST | None) -> bool:
+    """True if ``node``'s subtree contains an OR operator anywhere."""
+    if isinstance(node, ast.BoolOp):
+        if isinstance(node.op, ast.Or):
+            return True
+        return any(_contains_or(value) for value in node.values)
+    return False
+
+
+def _is_dnf_node(node: ast.AST | None) -> bool:
+    """True if the AST rooted at ``node`` is in disjunctive normal form.
+
+    DNF here means no AND operator has an OR anywhere beneath it, i.e. the
+    rule is a single gene, a pure AND-complex, or an OR of those.
+    """
+    if node is None or isinstance(node, ast.Name):
+        return True
+    if isinstance(node, ast.BoolOp):
+        if isinstance(node.op, ast.And):
+            return not any(_contains_or(value) for value in node.values)
+        # OR: every disjunct must itself be DNF
+        return all(_is_dnf_node(value) for value in node.values)
+    # Unknown node type: don't flag it as a problem.
+    return True
+
+
+def is_dnf(gpr: GPR | str | None) -> bool:
+    """Return whether a GPR is in disjunctive normal form (OR of AND-complexes).
+
+    Parameters
+    ----------
+    gpr
+        A cobra :class:`~cobra.core.gene.GPR`, a grRule string, or ``None``.
+        An empty/``None`` rule is trivially DNF.
+
+    Examples
+    --------
+    >>> is_dnf("(G1 and G2) or G3")
+    True
+    >>> is_dnf("(G1 or G2) and G3")
+    False
+    """
+    if isinstance(gpr, str):
+        gpr = GPR.from_string(gpr)
+    if gpr is None:
+        return True
+    return _is_dnf_node(gpr.body)
+
+
+@dataclass(frozen=True)
+class GPRIssue:
+    """A reaction whose GPR is flagged by the linter.
+
+    Attributes
+    ----------
+    reaction_id
+        ID of the reaction.
+    gpr
+        The (already cobra-normalised) grRule string.
+    reason
+        Human-readable explanation of why it was flagged.
+    """
+
+    reaction_id: str
+    gpr: str
+    reason: str
+
+
+_NON_DNF_REASON = (
+    "GPR is not in disjunctive normal form (an AND clause contains an OR). "
+    "Isoenzyme/complex reasoning and expand_model assume an OR of AND-complexes, "
+    'e.g. rewrite "(G1 or G2) and (G3 or G4)" as '
+    '"(G1 and G3) or (G1 and G4) or (G2 and G3) or (G2 and G4)".'
+)
+
+
+def find_non_dnf_grrules(model: cobra.Model) -> list[GPRIssue]:
+    """Find reactions whose GPR is not in disjunctive normal form ("OR of AND-complexes").
+
+    Uses cobra's GPR AST. Reactions with no GPR are skipped.
+
+    Returns
+    -------
+    list of GPRIssue
+        One entry per flagged reaction, in model reaction order. Empty if all
+        GPRs are simple OR-of-AND-complexes.
+    """
+    issues: list[GPRIssue] = []
+    for rxn in model.reactions:
+        if not rxn.gene_reaction_rule:
+            continue
+        if not is_dnf(rxn.gpr):
+            issues.append(GPRIssue(rxn.id, rxn.gene_reaction_rule, _NON_DNF_REASON))
+    return issues
diff --git a/src/raven_python/utils/parse.py b/src/raven_python/utils/parse.py
new file mode 100644
index 0000000..8068f6c
--- /dev/null
+++ b/src/raven_python/utils/parse.py
@@ -0,0 +1,33 @@
+"""Small parsing helpers shared across raven_python."""
+from __future__ import annotations
+
+import re
+
+# A metabolite written as ``name[comp]``. The name is greedy so that, for a
+# pathological name that itself contains brackets, the *last* ``[...]`` is taken
+# as the compartment (matching RAVEN getIndexes' ``max(strfind('['))`` rule).
+_NAME_COMP_RE = re.compile(r"^(?P<name>.+)\[(?P<comp>[^\[\]]+)\]$")
+
+
+def parse_name_comp(token: str) -> tuple[str, str | None]:
+    """Split a ``name[comp]`` token into ``(name, compartment)``.
+
+    This is the one genuinely cobra-absent sliver of RAVEN ``getIndexes``'
+    ``metcomps`` mode and ``addRxns`` eqnType 3: resolving a metabolite written
+    as its *name* plus a compartment in square brackets, e.g. ``"ATP[c]"``.
+
+    Returns ``(name, None)`` when there is no trailing ``[...]``.
+
+    Examples
+    --------
+    >>> parse_name_comp("ATP[c]")
+    ('ATP', 'c')
+    >>> parse_name_comp("ATP")
+    ('ATP', None)
+    >>> parse_name_comp("weird[name][m]")
+    ('weird[name]', 'm')
+    """
+    match = _NAME_COMP_RE.match(token.strip())
+    if match:
+        return match.group("name").strip(), match.group("comp").strip()
+    return token.strip(), None
diff --git a/src/raven_python/utils/sort.py b/src/raven_python/utils/sort.py
new file mode 100644
index 0000000..a8641a8
--- /dev/null
+++ b/src/raven_python/utils/sort.py
@@ -0,0 +1,21 @@
+"""Sort a model's identifiers alphabetically — useful for deterministic,
+diff-friendly output.
+
+cobra's ``DictList.sort`` reorders one list (and rebuilds its lookup index), but
+there is no single "sort the whole model" call; this provides it.
+"""
+from __future__ import annotations
+
+import cobra
+
+
+def sort_identifiers(model: cobra.Model) -> cobra.Model:
+    """Sort reactions, metabolites and genes alphabetically by ID, in place.
+
+    Returns the same (mutated) model for convenience. Compartments are a plain
+    dict and are emitted sorted by writers as needed.
+    """
+    model.reactions.sort(key=lambda r: r.id)
+    model.metabolites.sort(key=lambda m: m.id)
+    model.genes.sort(key=lambda g: g.id)
+    return model
diff --git a/src/raven_python/utils/validate.py b/src/raven_python/utils/validate.py
new file mode 100644
index 0000000..c08df48
--- /dev/null
+++ b/src/raven_python/utils/validate.py
@@ -0,0 +1,86 @@
+"""Curation checks for a model.
+
+A QC bundle cobra has no single call for: orphaned objects, empty reactions,
+duplicated metabolite ``name + compartment``, empty names, and objective sanity.
+:func:`check_model` returns these as structured :class:`ModelIssue` records.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import cobra
+
+
+@dataclass(frozen=True)
+class ModelIssue:
+    """One curation issue found in a model.
+
+    Attributes
+    ----------
+    category
+        Machine-readable kind, e.g. ``"orphan_metabolite"``, ``"empty_reaction"``,
+        ``"orphan_gene"``, ``"duplicate_name_compartment"``,
+        ``"empty_metabolite_name"``, ``"objective"``.
+    object_id
+        ID of the offending object, or ``None`` for model-level issues.
+    message
+        Human-readable description.
+    """
+
+    category: str
+    object_id: str | None
+    message: str
+
+
+def check_model(model: cobra.Model) -> list[ModelIssue]:
+    """Run curation checks on a model and return the issues found.
+
+    Does not
+    raise; returns a (possibly empty) list of :class:`ModelIssue`.
+    """
+    issues: list[ModelIssue] = []
+
+    for met in model.metabolites:
+        if not met.reactions:
+            issues.append(
+                ModelIssue("orphan_metabolite", met.id, f"Metabolite {met.id!r} is not used in any reaction.")
+            )
+        if not (met.name and str(met.name).strip()):
+            issues.append(
+                ModelIssue("empty_metabolite_name", met.id, f"Metabolite {met.id!r} has no name.")
+            )
+
+    for gene in model.genes:
+        if not gene.reactions:
+            issues.append(
+                ModelIssue("orphan_gene", gene.id, f"Gene {gene.id!r} is not associated with any reaction.")
+            )
+
+    for rxn in model.reactions:
+        if not rxn.metabolites:
+            issues.append(
+                ModelIssue("empty_reaction", rxn.id, f"Reaction {rxn.id!r} has no metabolites.")
+            )
+
+    by_name_comp: dict[tuple[str, str], list[str]] = {}
+    for met in model.metabolites:
+        by_name_comp.setdefault((met.name, met.compartment), []).append(met.id)
+    for (name, comp), ids in by_name_comp.items():
+        if name and len(ids) > 1:
+            issues.append(
+                ModelIssue(
+                    "duplicate_name_compartment",
+                    None,
+                    f"{len(ids)} metabolites share name {name!r} in compartment {comp!r}: {sorted(ids)}",
+                )
+            )
+
+    objective_rxns = [r.id for r in model.reactions if r.objective_coefficient != 0]
+    if not objective_rxns:
+        issues.append(ModelIssue("objective", None, "No reaction has a nonzero objective coefficient."))
+    elif len(objective_rxns) > 1:
+        issues.append(
+            ModelIssue("objective", None, f"Multiple objective reactions: {sorted(objective_rxns)}")
+        )
+
+    return issues
diff --git a/tests/data/kegg_dump/compound b/tests/data/kegg_dump/compound
new file mode 100644
index 0000000..a78d176
--- /dev/null
+++ b/tests/data/kegg_dump/compound
@@ -0,0 +1,34 @@
+ENTRY       C00001                      Compound
+NAME        H2O;
+            Water
+FORMULA     H2O
+DBLINKS     PubChem: 3303
+            ChEBI: 15377
+///
+ENTRY       C00002                      Compound
+NAME        ATP
+FORMULA     C10H16N5O13P3
+///
+ENTRY       C00003                      Compound
+NAME        NAD+;
+            NAD
+FORMULA     C21H28N7O14P2
+///
+ENTRY       C00006                      Compound
+NAME        NADP+
+FORMULA     C21H29N7O17P3
+///
+ENTRY       C00031                      Compound
+NAME        D-Glucose;
+            Grape sugar
+FORMULA     C6H12O6
+DBLINKS     ChEBI: 4167 17634
+///
+ENTRY       C01083                      Compound
+NAME        alpha,alpha-Trehalose
+FORMULA     C12H22O11
+///
+ENTRY       C00007                      Compound
+NAME        Oxygen
+FORMULA     O2
+///
diff --git a/tests/data/kegg_dump/compound.inchi b/tests/data/kegg_dump/compound.inchi
new file mode 100644
index 0000000..448312f
--- /dev/null
+++ b/tests/data/kegg_dump/compound.inchi
@@ -0,0 +1 @@
+C00031	InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2
diff --git a/tests/data/kegg_dump/genes.pep b/tests/data/kegg_dump/genes.pep
new file mode 100644
index 0000000..f30073d
--- /dev/null
+++ b/tests/data/kegg_dump/genes.pep
@@ -0,0 +1,12 @@
+>bsu:BSU31050 gbsB; choline dehydrogenase
+MKVLAAGGTGYIGSHTVVELLEAGYDVVVLDNLSNGHREAVPKGVPFveqIDLRDREALDR
+>bsu:BSU31060 hypothetical protein
+MKVLAAGGTGYIGSHTVVELLEAGYDVVVLDNLSNGHREAVPKGVPFveqIDLRDREALDX
+>eco:b0001 thrA; aspartokinase
+MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA
+>hsa:124 ADH1A; alcohol dehydrogenase 1A
+MSTAGKVIKCKAAVLWELKKPFSIEEVEVAPPKAHEVRIKMVATGICRSDDHVVSGTLVT
+>hsa:125 ADH1B; alcohol dehydrogenase 1B
+MSTAGKVIKCKAAVLWEVKKPFSIEDVEVAPPKAHEVRIKMVATGICRSDDHVVSGTLVT
+>xxx:unused some other gene not in any KO
+MAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
diff --git a/tests/data/kegg_dump/ko b/tests/data/kegg_dump/ko
new file mode 100644
index 0000000..f6ae027
--- /dev/null
+++ b/tests/data/kegg_dump/ko
@@ -0,0 +1,14 @@
+ENTRY       K01194                      KO
+NAME        treA, TREH
+DEFINITION  alpha,alpha-trehalase [EC:3.2.1.28]
+GENES       BSU: BSU31050(gbsB) BSU31060
+            HSA: 124 125(ADH)
+///
+ENTRY       K00002                      KO
+DEFINITION  AKR1A1; alcohol dehydrogenase (NADP+) [EC:1.1.1.2]
+GENES       ECO: b0001
+///
+ENTRY       K99999                      KO
+DEFINITION  unlinked ortholog
+GENES       ECO: b9999
+///
diff --git a/tests/data/kegg_dump/reaction b/tests/data/kegg_dump/reaction
new file mode 100644
index 0000000..b0e75c5
--- /dev/null
+++ b/tests/data/kegg_dump/reaction
@@ -0,0 +1,31 @@
+ENTRY       R00010                      Reaction
+NAME        alpha,alpha-trehalose glucohydrolase
+DEFINITION  alpha,alpha-Trehalose + H2O <=> 2 D-Glucose
+EQUATION    C01083 + C00001 <=> 2 C00031
+ENZYME      3.2.1.28
+PATHWAY     rn00500  Starch and sucrose metabolism
+            rn01100  Metabolic pathways
+MODULE      M00599  example module
+ORTHOLOGY   K01194  alpha,alpha-trehalase [EC:3.2.1.28]
+DBLINKS     RHEA: 32678
+///
+ENTRY       R00100                      Reaction
+NAME        spontaneous example
+COMMENT     This reaction is spontaneous.
+EQUATION    C00002 <=> C00003
+ORTHOLOGY   K00002  some enzyme
+///
+ENTRY       R00200                      Reaction
+NAME        undefined stoich example
+EQUATION    C00001 + n C00002 <=> C00003
+///
+ENTRY       R00300                      Reaction
+NAME        general example
+COMMENT     General reaction.
+EQUATION    C00031 <=> C00006
+ORTHOLOGY   K09999  lumped ortholog
+///
+ENTRY       R00400                      Reaction
+NAME        empty after cancellation
+EQUATION    C00007 <=> C00007
+///
diff --git a/tests/data/kegg_dump/reaction_mapformula.lst b/tests/data/kegg_dump/reaction_mapformula.lst
new file mode 100644
index 0000000..0adb8f0
--- /dev/null
+++ b/tests/data/kegg_dump/reaction_mapformula.lst
@@ -0,0 +1,3 @@
+R00010: 00500: C01083 => C00031
+R00010: 00010: C00031 => C01083
+R00100: 00010: C00002 => C00003
diff --git a/tests/data/kegg_dump/taxonomy b/tests/data/kegg_dump/taxonomy
new file mode 100644
index 0000000..f0447e6
--- /dev/null
+++ b/tests/data/kegg_dump/taxonomy
@@ -0,0 +1,10 @@
+# Prokaryotes
+## Bacteria
+### Firmicutes
+T00010	bsu	Bacillus subtilis 168	Bacillus
+### Gammaproteobacteria - Enterobacteria
+T00007	eco	Escherichia coli K-12 MG1655	Escherichia
+# Eukaryotes
+## Animals
+### Vertebrates - Mammals
+T01001	hsa	Homo sapiens (human)	Homo
diff --git a/tests/test_analysis_fseof.py b/tests/test_analysis_fseof.py
new file mode 100644
index 0000000..5f23f3f
--- /dev/null
+++ b/tests/test_analysis_fseof.py
@@ -0,0 +1,112 @@
+"""Tests for FSEOF (analysis/fseof.py, Phase 5)."""
+import cobra
+import pytest
+
+from raven_python.analysis import FSEOFResult, fseof
+
+
+@pytest.fixture
+def model():
+    """S -> I, then I branches to product P (via v2) or biomass B (via v3).
+
+    Enforcing product export (EX_P) should amplify the product branch (v1, v2) and
+    suppress the biomass branch (v3), which competes for the shared intermediate I.
+    """
+    m = cobra.Model("cell")
+    S, inter, P, B = (cobra.Metabolite(x, compartment="c") for x in ("S", "I", "P", "B"))
+    m.add_metabolites([S, inter, P, B])
+    sup = cobra.Reaction("sup", lower_bound=0, upper_bound=10)  # -> S (substrate supply)
+    sup.add_metabolites({S: 1})
+    v1 = cobra.Reaction("v1", lower_bound=0, upper_bound=1000)
+    v1.add_metabolites({S: -1, inter: 1})
+    v2 = cobra.Reaction("v2", lower_bound=0, upper_bound=1000)
+    v2.add_metabolites({inter: -1, P: 1})
+    v3 = cobra.Reaction("v3", lower_bound=0, upper_bound=1000)
+    v3.add_metabolites({inter: -1, B: 1})
+    ex_p = cobra.Reaction("EX_P", lower_bound=0, upper_bound=1000)  # target product export
+    ex_p.add_metabolites({P: -1})
+    ex_b = cobra.Reaction("EX_B", lower_bound=0, upper_bound=1000)  # biomass
+    ex_b.add_metabolites({B: -1})
+    m.add_reactions([sup, v1, v2, v3, ex_p, ex_b])
+    v1.gene_reaction_rule = "gA"
+    v2.gene_reaction_rule = "gB"
+    v3.gene_reaction_rule = "gC"
+    m.objective = "EX_B"
+    return m
+
+
+def test_returns_result_with_scan(model):
+    res = fseof(model, "EX_P", n_steps=8)
+    assert isinstance(res, FSEOFResult)
+    assert res.scan.shape[1] == len(res.enforced) >= 2
+    assert "v2" in res.scan.index  # full scan retained, indexed by reaction
+
+
+def test_amplification_targets(model):
+    res = fseof(model, "EX_P", n_steps=8)
+    amp = set(res.amplification["reaction"])
+    # the product-forming reaction is amplified as EX_P is enforced upward
+    # (v1/sup run at capacity throughout, so they stay constant and aren't flagged).
+    assert {"v2", "EX_P"} <= amp
+    v2 = res.targets.set_index("reaction").loc["v2"]
+    assert v2["slope"] > 0 and v2["correlation"] > 0.9
+
+
+def test_knockdown_of_competing_branch(model):
+    res = fseof(model, "EX_P", n_steps=8)
+    # v3 (biomass branch) competes for I -> suppressed toward zero -> knockdown/knockout
+    down = set(res.knockout["reaction"])
+    assert "v3" in down
+    v3 = res.targets.set_index("reaction").loc["v3"]
+    assert v3["slope"] < 0
+    assert v3["target_type"] in ("knockdown", "knockout")
+
+
+def test_gene_targets_aggregation(model):
+    res = fseof(model, "EX_P", n_steps=8)
+    genes = set(res.gene_targets["gene"])
+    assert {"gA", "gB", "gC"} & genes  # reaction targets mapped to their genes
+    gB = res.gene_targets.set_index("gene").loc["gB"]
+    assert "v2" in gB["reactions"]
+
+
+def test_unproducible_target_raises(model):
+    # A reaction that cannot carry positive flux is not a valid product target.
+    dead = cobra.Reaction("dead", lower_bound=0, upper_bound=0)
+    dead.add_metabolites({model.metabolites.P: -1})
+    model.add_reactions([dead])
+    with pytest.raises(ValueError, match="cannot carry positive flux"):
+        fseof(model, "dead")
+
+
+def test_infeasible_model_raises_clear_error(model):
+    """An infeasible model (slim_optimize -> NaN) raises the clear guard, not a NaN scan."""
+    model.reactions.sup.bounds = (5, 5)  # force uptake while EX_P demands more -> infeasible
+    model.reactions.EX_P.bounds = (1000, 1000)
+    with pytest.raises(ValueError, match="cannot carry positive flux"):
+        fseof(model, "EX_P", n_steps=4)
+
+
+# --- regression: slope-based labels (known_issues.md F3) -------------------
+
+def test_amplify_label_uses_abs_slope_not_endpoint_difference():
+    """A reaction whose |flux| trend is upward but whose final value happens
+    to equal the initial (endpoints straddle a peak) should be labelled
+    ``amplify`` by the regression-slope rule, not ``knockdown`` by the old
+    endpoint check."""
+    import numpy as np
+    import pandas as pd
+
+    from raven_python.analysis.fseof import _classify
+
+    # Endpoints equal (0), but the |flux| regression slope is clearly positive
+    # over the scan — the new classifier picks amplify; the old endpoint code
+    # would have said knockdown (final not below eps, abs(final) not > abs(initial)).
+    enforced = np.linspace(0.0, 1.0, 6)
+    flux = np.array([0.0, 0.3, 0.6, 0.9, 0.4, 0.0])
+    scan = pd.DataFrame([flux], index=["r_test"], columns=enforced)
+    m = cobra.Model("synth")
+    m.add_reactions([cobra.Reaction("r_test")])
+    table = _classify(m, scan, enforced, corr_threshold=0.0, flux_eps=1e-6)
+    assert not table.empty
+    assert table.iloc[0]["target_type"] == "amplify"
diff --git a/tests/test_analysis_reporter.py b/tests/test_analysis_reporter.py
new file mode 100644
index 0000000..918f15d
--- /dev/null
+++ b/tests/test_analysis_reporter.py
@@ -0,0 +1,89 @@
+"""Tests for Reporter Metabolites (analysis/reporter.py, Phase 5)."""
+import cobra
+import pytest
+
+from raven_python.analysis import ReporterResult, reporter_metabolites
+
+
+def _met(mid):
+    return cobra.Metabolite(mid, name=mid[:-2], compartment="c")
+
+
+@pytest.fixture
+def model():
+    """A-r1(g1)-B-r2(g2)-C-r3(g3); rX touches X but has no gene."""
+    m = cobra.Model("rep")
+    A, B, C, X = _met("A_c"), _met("B_c"), _met("C_c"), _met("X_c")
+    m.add_metabolites([A, B, C, X])
+    r1 = cobra.Reaction("r1")
+    r1.add_metabolites({A: -1, B: 1})
+    r2 = cobra.Reaction("r2")
+    r2.add_metabolites({B: -1, C: 1})
+    r3 = cobra.Reaction("r3")
+    r3.add_metabolites({C: -1})
+    rX = cobra.Reaction("rX")
+    rX.add_metabolites({X: -1})
+    m.add_reactions([r1, r2, r3, rX])
+    r1.gene_reaction_rule = "g1"
+    r2.gene_reaction_rule = "g2"
+    r3.gene_reaction_rule = "g3"
+    return m
+
+
+def test_ranks_metabolites_by_surrounding_significance(model):
+    # g1, g2 highly significant; g3 not. B (g1,g2) > A (g1) > C (g2,g3).
+    (res,) = reporter_metabolites(model, {"g1": 0.001, "g2": 0.001, "g3": 0.5})
+    assert isinstance(res, ReporterResult) and res.test == "all"
+    assert list(res.table["metabolite"]) == ["B_c", "A_c", "C_c"]
+    assert res.table["z_score"].is_monotonic_decreasing
+    assert "X_c" not in set(res.table["metabolite"])  # no neighbouring genes -> excluded
+
+
+def test_neighbour_counts(model):
+    (res,) = reporter_metabolites(model, {"g1": 0.01, "g2": 0.01, "g3": 0.01})
+    counts = dict(zip(res.table["metabolite"], res.table["n_genes"], strict=True))
+    assert counts == {"A_c": 1, "B_c": 2, "C_c": 2}
+
+
+def test_uniform_pvalues_give_zero_scores(model):
+    # All genes identical -> background std 0 -> nothing stands out (corrected z = 0).
+    (res,) = reporter_metabolites(model, {"g1": 0.2, "g2": 0.2, "g3": 0.2})
+    assert (res.table["z_score"] == 0.0).all()
+    assert res.table["p_value"].to_numpy() == pytest.approx(0.5)
+
+
+def test_p_value_low_for_top_metabolite(model):
+    (res,) = reporter_metabolites(model, {"g1": 1e-6, "g2": 1e-6, "g3": 0.9})
+    top = res.table.iloc[0]
+    assert top["metabolite"] == "B_c"
+    assert top["p_value"] < 0.5  # enriched -> significant
+
+
+def test_fold_change_splits_up_down(model):
+    res = reporter_metabolites(
+        model,
+        {"g1": 0.001, "g2": 0.001, "g3": 0.001},
+        gene_fold_changes={"g1": 2.0, "g2": -2.0, "g3": 1.0},
+    )
+    assert [r.test for r in res] == ["all", "up", "down"]
+    # 'up' uses g1,g3 -> A(g1) and C(g3) have neighbours; B needs g2 (down) so its
+    # only 'up' neighbour is g1 -> still present. 'down' uses only g2.
+    down = next(r for r in res if r.test == "down").table
+    assert set(down["metabolite"]) <= {"B_c", "C_c"}  # g2 touches B and C
+
+
+def test_filters_unknown_and_nan_genes(model):
+    # gX not in model, gNaN has NaN p-value -> both ignored; result still computed.
+    (res,) = reporter_metabolites(
+        model, {"g1": 0.01, "g2": 0.01, "g3": 0.01, "gX": 0.001, "gNaN": float("nan")}
+    )
+    assert "gX" not in set(model.genes.list_attr("id"))  # sanity
+    assert len(res.table) == 3  # A, B, C scored from the three real genes
+
+
+def test_out_of_range_pvalue_dropped_not_poisoning(model):
+    """A p-value outside [0,1] is dropped, not propagated as NaN through all scores."""
+    (res,) = reporter_metabolites(model, {"g1": 0.01, "g2": 0.01, "g3": 1.7})  # g3 invalid
+    import numpy as np
+
+    assert not np.isnan(res.table["z_score"].to_numpy()).any()  # no NaN poisoning
diff --git a/tests/test_analysis_sampling.py b/tests/test_analysis_sampling.py
new file mode 100644
index 0000000..155500b
--- /dev/null
+++ b/tests/test_analysis_sampling.py
@@ -0,0 +1,133 @@
+"""Tests for random-objective flux sampling (analysis/sampling.py)."""
+import cobra
+import numpy as np
+import pytest
+
+from raven_python.analysis import (
+    RandomSamplingResult,
+    find_good_reactions,
+    random_sampling,
+)
+
+
+@pytest.fixture
+def model():
+    """S uptake -> A -> {B export, C export}, plus a thermodynamically infeasible loop.
+
+    sup -> A; A->B (v_b) and A->C (v_c); B,C exported. r_f/r_r form a closed cycle
+    (X<->Y both directions, no in/out) that can spin arbitrarily — a loop whose
+    reactions must be excluded from the random objectives.
+    """
+    m = cobra.Model("toy")
+    A, B, C, X, Y = (cobra.Metabolite(x, compartment="c") for x in "ABCXY")
+    m.add_metabolites([A, B, C, X, Y])
+
+    def rxn(rid, mets, lb=0, ub=1000):
+        r = cobra.Reaction(rid, lower_bound=lb, upper_bound=ub)
+        r.add_metabolites(mets)
+        return r
+
+    rxns = [
+        rxn("sup", {A: 1}, ub=10),       # substrate supply
+        rxn("v_b", {A: -1, B: 1}),       # A -> B
+        rxn("v_c", {A: -1, C: 1}),       # A -> C
+        rxn("EX_B", {B: -1}),            # export B
+        rxn("EX_C", {C: -1}),            # export C
+        rxn("r_f", {X: -1, Y: 1}, lb=-1000),  # X <-> Y  ┐ closed loop
+        rxn("r_r", {Y: -1, X: 1}, lb=-1000),  # Y <-> X  ┘ (no source/sink for X,Y)
+    ]
+    m.add_reactions(rxns)
+    m.objective = "EX_B"
+    return m
+
+
+def test_good_reactions_excludes_loop(model):
+    good = find_good_reactions(model)
+    # The closed X<->Y cycle can spin to the 1000 bound -> excluded.
+    assert "r_f" not in good and "r_r" not in good
+    # Real flux-carrying reactions are kept.
+    assert {"sup", "v_b", "EX_B"} <= set(good)
+
+
+def test_returns_result_shape(model):
+    res = random_sampling(model, n_samples=20, seed=1)
+    assert isinstance(res, RandomSamplingResult)
+    assert res.samples.shape == (20, len(model.reactions))
+    assert list(res.samples.columns) == [r.id for r in model.reactions]
+    assert "r_f" not in res.good_reactions
+
+
+def test_samples_are_steady_state(model):
+    """Every sample must satisfy S·v = 0 (mass balance)."""
+    res = random_sampling(model, n_samples=15, seed=2)
+    s_matrix = cobra.util.create_stoichiometric_matrix(model)
+    ids = [r.id for r in model.reactions]
+    for _, row in res.samples.iterrows():
+        residual = s_matrix @ row[ids].to_numpy()
+        assert np.allclose(residual, 0, atol=1e-6)
+
+
+def test_samples_respect_bounds(model):
+    res = random_sampling(model, n_samples=15, seed=3)
+    for r in model.reactions:
+        col = res.samples[r.id].to_numpy()
+        assert (col >= r.lower_bound - 1e-6).all()
+        assert (col <= r.upper_bound + 1e-6).all()
+
+
+def test_seed_is_reproducible(model):
+    a = random_sampling(model, n_samples=10, seed=42).samples
+    b = random_sampling(model, n_samples=10, seed=42).samples
+    assert np.allclose(a.to_numpy(), b.to_numpy())
+
+
+def test_good_reactions_reused(model):
+    """Passing good_reactions back in reproduces the FVA-derived set without recomputing."""
+    good = find_good_reactions(model)
+    res = random_sampling(model, n_samples=5, good_reactions=good, seed=0)
+    assert res.good_reactions == good
+
+
+def test_min_flux_runs(model):
+    res = random_sampling(model, n_samples=8, min_flux=True, seed=5)
+    assert res.samples.shape == (8, len(model.reactions))
+
+
+def test_diverse_samples(model):
+    """Random objectives should explore different states, not a single FBA optimum."""
+    res = random_sampling(model, n_samples=40, seed=7)
+    # The branch split A->B vs A->C should vary across samples.
+    assert res.samples["v_b"].std() > 1e-6
+    assert res.samples["v_c"].std() > 1e-6
+
+
+def test_rejects_bad_n_samples(model):
+    with pytest.raises(ValueError, match="n_samples"):
+        random_sampling(model, n_samples=0)
+
+
+def test_too_few_good_reactions(model):
+    with pytest.raises(ValueError, match="usable reactions"):
+        random_sampling(model, n_samples=5, good_reactions=["sup"], n_objectives=2)
+
+
+def test_good_reactions_keeps_reactions_at_default_bound():
+    """A legitimate reaction reaching the model's 1000 bound is not dropped as a loop.
+
+    Regression: the old loop_bound>=1000 test wrongly excluded any reaction that
+    reaches the default bound. Loopless FVA keeps it (real flux) and still drops a
+    closed loop.
+    """
+    m = cobra.Model("b")
+    a, b = (cobra.Metabolite(x, compartment="c") for x in "ab")
+    m.add_metabolites([a, b])
+    sup = cobra.Reaction("sup", lower_bound=0, upper_bound=1000)  # uptake to the 1000 cap
+    sup.add_metabolites({a: 1})
+    conv = cobra.Reaction("conv", lower_bound=0, upper_bound=1000)
+    conv.add_metabolites({a: -1, b: 1})
+    ex = cobra.Reaction("EX_b", lower_bound=0, upper_bound=1000)
+    ex.add_metabolites({b: -1})
+    m.add_reactions([sup, conv, ex])
+    m.objective = "EX_b"
+    good = find_good_reactions(m)
+    assert {"sup", "conv", "EX_b"} <= set(good)  # all reach 1000 but are real, not loops
diff --git a/tests/test_binaries.py b/tests/test_binaries.py
new file mode 100644
index 0000000..d74ce0b
--- /dev/null
+++ b/tests/test_binaries.py
@@ -0,0 +1,80 @@
+"""Tests for raven_python.binaries (binary resolution + bundled-ZIP provisioning)."""
+import hashlib
+import shutil
+import zipfile
+from pathlib import Path
+
+import pytest
+
+from raven_python import binaries
+
+
+def test_resolve_explicit_path():
+    assert binaries.resolve_binary("blastp", binary="/opt/x/blastp") == "/opt/x/blastp"
+
+
+def test_resolve_env_var(monkeypatch):
+    monkeypatch.setenv("RAVEN_PYTHON_DIAMOND", "/custom/diamond")
+    assert binaries.resolve_binary("diamond") == "/custom/diamond"
+
+
+@pytest.mark.skipif(not shutil.which("blastp"), reason="blastp not installed")
+def test_resolve_via_path():
+    assert binaries.resolve_binary("blastp") == shutil.which("blastp")
+
+
+def test_resolve_unresolvable_raises(monkeypatch):
+    monkeypatch.setattr(shutil, "which", lambda _: None)
+    with pytest.raises(FileNotFoundError, match="Could not find"):
+        binaries.resolve_binary("diamond")  # empty registry, not on PATH
+
+
+def test_platform_key_format():
+    key = binaries.platform_key()
+    assert "-" in key
+    os_part, arch = key.split("-", 1)
+    assert os_part in {"linux", "macos", "windows"} or os_part  # tolerant
+
+
+def test_ensure_binary_downloads_verifies_extracts(tmp_path, monkeypatch):
+    # Build a fake bundle ZIP containing an executable, served via file:// URL.
+    exe = tmp_path / "footool"
+    exe.write_text("#!/bin/sh\necho hi\n")
+    archive = tmp_path / "footool.zip"
+    with zipfile.ZipFile(archive, "w") as zf:
+        zf.write(exe, "footool")
+    sha = hashlib.sha256(archive.read_bytes()).hexdigest()
+
+    registry = {
+        "footool": {
+            "version": "1.0",
+            "provides": ["footool"],
+            "platforms": {binaries.platform_key(): {"url": archive.as_uri(), "sha256": sha}},
+        }
+    }
+    monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "cache"))
+
+    path = binaries.ensure_binary("footool", registry=registry)
+    assert Path(path).exists()
+    assert Path(path).name == "footool"
+    # cached on second call (same path, no re-download needed)
+    assert binaries.ensure_binary("footool", registry=registry) == path
+
+
+def test_ensure_binary_sha_mismatch(tmp_path, monkeypatch):
+    archive = tmp_path / "x.zip"
+    with zipfile.ZipFile(archive, "w") as zf:
+        zf.writestr("footool", "data")
+    registry = {
+        "footool": {"version": "1", "provides": ["footool"],
+                    "platforms": {binaries.platform_key(): {"url": archive.as_uri(), "sha256": "deadbeef"}}}
+    }
+    monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "cache"))
+    with pytest.raises(ValueError, match="SHA256 mismatch"):
+        binaries.ensure_binary("footool", registry=registry)
+
+
+def test_ensure_binary_unhosted_platform_raises(tmp_path):
+    registry = {"footool": {"version": "1", "provides": ["footool"], "platforms": {}}}
+    with pytest.raises(FileNotFoundError, match="No bundled"):
+        binaries.ensure_binary("footool", registry=registry)
diff --git a/tests/test_change_grrules.py b/tests/test_change_grrules.py
new file mode 100644
index 0000000..d33f723
--- /dev/null
+++ b/tests/test_change_grrules.py
@@ -0,0 +1,49 @@
+"""Tests for change_gene_reaction_rules (changeGrRules port)."""
+import cobra
+import pytest
+
+from raven_python.manipulation import add_reactions_from_equations, change_gene_reaction_rules
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("t")
+    m.add_metabolites(
+        [cobra.Metabolite("a_c", compartment="c"), cobra.Metabolite("b_c", compartment="c")]
+    )
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R1", "equation": "a_c --> b_c", "gene_reaction_rule": "G1"},
+            {"id": "R2", "equation": "a_c --> b_c"},
+        ],
+    )
+    return m
+
+
+def test_replace_rule_and_create_genes(model):
+    (rxn,) = change_gene_reaction_rules(model, {"R1": "G2 and G3"})
+    assert rxn.gene_reaction_rule == "G2 and G3"
+    assert {g.id for g in rxn.genes} == {"G2", "G3"}
+    assert {"G2", "G3"} <= {g.id for g in model.genes}
+
+
+def test_append_rule(model):
+    change_gene_reaction_rules(model, {"R1": "G4"}, replace=False)
+    # (G1) or (G4), normalised by cobra
+    assert model.reactions.get_by_id("R1").gene_reaction_rule == "G1 or G4"
+
+
+def test_append_when_empty_is_just_new(model):
+    change_gene_reaction_rules(model, {"R2": "G5"}, replace=False)
+    assert model.reactions.get_by_id("R2").gene_reaction_rule == "G5"
+
+
+def test_batch(model):
+    changed = change_gene_reaction_rules(model, {"R1": "GA", "R2": "GB"})
+    assert [r.id for r in changed] == ["R1", "R2"]
+
+
+def test_unknown_reaction_errors(model):
+    with pytest.raises(ValueError, match="not found"):
+        change_gene_reaction_rules(model, {"NOPE": "G1"})
diff --git a/tests/test_comparison.py b/tests/test_comparison.py
new file mode 100644
index 0000000..26a20f0
--- /dev/null
+++ b/tests/test_comparison.py
@@ -0,0 +1,123 @@
+"""Tests for comparison/compare.py — N-model comparison (Phase 5)."""
+from __future__ import annotations
+
+import cobra
+import pytest
+
+from raven_python.comparison import ModelComparison, compare_models
+from raven_python.tasks import Task
+
+
+def _mk(model_id: str, reactions: list[tuple[str, dict[str, int], str | None]],
+        genes: list[str] | None = None) -> cobra.Model:
+    """Tiny helper: build a model with the given reactions + optional gene rules + subsystems."""
+    m = cobra.Model(model_id)
+    mets: dict[str, cobra.Metabolite] = {}
+    for _rid, stoich, _ in reactions:
+        for mid in stoich:
+            if mid not in mets:
+                mets[mid] = cobra.Metabolite(mid, name=mid.split("_")[0], compartment="c")
+                m.add_metabolites([mets[mid]])
+    for (rid, stoich, sub), gpr in zip(reactions, genes or [None] * len(reactions), strict=True):
+        r = cobra.Reaction(rid, lower_bound=-1000, upper_bound=1000)
+        r.add_metabolites({mets[mid]: c for mid, c in stoich.items()})
+        if sub is not None:
+            r.subsystem = sub
+        if gpr is not None:
+            r.gene_reaction_rule = gpr
+        m.add_reactions([r])
+    return m
+
+
+@pytest.fixture
+def two_models():
+    """Two models sharing r1/r2 but each with one unique reaction; different subsystems."""
+    a = _mk("A", [("r1", {"A_c": -1, "B_c": 1}, "carbo"),
+                  ("r2", {"B_c": -1, "C_c": 1}, "amino"),
+                  ("r3", {"C_c": -1, "D_c": 1}, "carbo")],
+            genes=["g1", "g2", "g3"])
+    b = _mk("B", [("r1", {"A_c": -1, "B_c": 1}, "carbo"),
+                  ("r2", {"B_c": -1, "C_c": 1}, "amino"),
+                  ("r4", {"B_c": -1, "E_c": 1}, "lipid")],
+            genes=["g1", "g2", "g4"])
+    return [a, b]
+
+
+def test_returns_dataclass(two_models):
+    res = compare_models(two_models)
+    assert isinstance(res, ModelComparison)
+    assert res.model_ids == ["A", "B"]
+
+
+def test_reactions_matrix_shape_and_values(two_models):
+    res = compare_models(two_models)
+    # union = {r1, r2, r3, r4}; both have r1+r2, only A has r3, only B has r4.
+    assert set(res.reactions.index) == {"r1", "r2", "r3", "r4"}
+    assert res.reactions.loc["r1", "A"] == 1 and res.reactions.loc["r1", "B"] == 1
+    assert res.reactions.loc["r3", "A"] == 1 and res.reactions.loc["r3", "B"] == 0
+    assert res.reactions.loc["r4", "A"] == 0 and res.reactions.loc["r4", "B"] == 1
+
+
+def test_metabolites_and_genes_union(two_models):
+    res = compare_models(two_models)
+    assert set(res.metabolites.index) == {"A_c", "B_c", "C_c", "D_c", "E_c"}
+    assert set(res.genes.index) == {"g1", "g2", "g3", "g4"}
+    assert res.genes.loc["g3", "A"] == 1 and res.genes.loc["g3", "B"] == 0
+
+
+def test_subsystems_counts(two_models):
+    res = compare_models(two_models)
+    # A: carbo=2 (r1+r3), amino=1; B: carbo=1, amino=1, lipid=1.
+    assert res.subsystems.loc["carbo", "A"] == 2
+    assert res.subsystems.loc["carbo", "B"] == 1
+    assert res.subsystems.loc["lipid", "B"] == 1
+    assert res.subsystems.loc["lipid", "A"] == 0
+
+
+def test_subsystems_empty_falls_under_none():
+    a = _mk("A", [("r1", {"X_c": -1, "Y_c": 1}, None)])
+    b = _mk("B", [("r1", {"X_c": -1, "Y_c": 1}, "")])
+    res = compare_models([a, b])
+    assert res.subsystems.loc["(none)", "A"] == 1
+    assert res.subsystems.loc["(none)", "B"] == 1
+
+
+def test_jaccard_similarity_diagonal_and_symmetry(two_models):
+    res = compare_models(two_models)
+    # Diagonal = 1 (self vs self).
+    assert res.similarity.loc["A", "A"] == 1.0
+    assert res.similarity.loc["B", "B"] == 1.0
+    # Symmetric.
+    assert res.similarity.loc["A", "B"] == res.similarity.loc["B", "A"]
+    # Shared r1+r2; total union 4 → Jaccard 2/4 = 0.5.
+    assert res.similarity.loc["A", "B"] == pytest.approx(0.5)
+
+
+def test_tasks_optional_and_passed_through(two_models):
+    """Both models export E → expect both to pass the make-E task."""
+    # Add a sink so E can be excreted (otherwise it accumulates → infeasible at steady state).
+    for m in two_models:
+        if "E_c" in [x.id for x in m.metabolites]:
+            m.add_boundary(m.metabolites.get_by_id("E_c"), type="demand")
+    res = compare_models(two_models, tasks=[
+        Task(id="make_E", inputs=[("A[c]", 0.0, 1000.0)], outputs=[("E[c]", 1.0, 1.0)]),
+    ])
+    assert res.tasks is not None
+    assert list(res.tasks.index) == ["make_E"]
+    # Only B has r4 (which makes E), so only B passes.
+    assert bool(res.tasks.loc["make_E", "B"]) is True
+    assert bool(res.tasks.loc["make_E", "A"]) is False
+
+
+def test_duplicate_or_missing_model_id_disambiguated():
+    """Two models with the same id (or empty id) should get distinct labels."""
+    a = _mk("", [("r1", {"X_c": -1, "Y_c": 1}, None)])
+    b = _mk("", [("r1", {"X_c": -1, "Y_c": 1}, None)])
+    res = compare_models([a, b])
+    assert res.model_ids[0] == "model_0"
+    assert res.model_ids[1] != "model_0"      # disambiguated
+
+
+def test_rejects_single_model(two_models):
+    with pytest.raises(ValueError, match="needs .*2"):
+        compare_models(two_models[:1])
diff --git a/tests/test_data.py b/tests/test_data.py
new file mode 100644
index 0000000..714c3a9
--- /dev/null
+++ b/tests/test_data.py
@@ -0,0 +1,89 @@
+"""Tests for ensure_data (data.py). Uses file:// URLs to avoid the network."""
+import hashlib
+
+import pytest
+
+from raven_python.data import (
+    CORE_KEGG_FILES,
+    ensure_data_file,
+    ensure_kegg_data,
+)
+
+
+def _sha256(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+@pytest.fixture
+def served(tmp_path, monkeypatch):
+    """A fake registry served from local files, with the cache pointed at tmp."""
+    src = tmp_path / "src"
+    src.mkdir()
+    payloads = {
+        "reference_model.yml.gz": b"!!omap model bytes",
+        "ko_reaction.tsv.gz": b"ko\treaction\n",
+        "ko_names.tsv.gz": b"ko\tname\n",
+        "organism_gene_ko.tsv.xz": b"organism\tgene\tko\n",
+        "rxn_flags.tsv.gz": b"reaction\tspontaneous\n",
+    }
+    files = {}
+    for name, data in payloads.items():
+        path = src / name
+        path.write_bytes(data)
+        files[name] = {"url": path.as_uri(), "sha256": _sha256(data)}
+    registry = {"kegg": {"version": "v1", "files": files}}
+
+    cache = tmp_path / "cache"
+    monkeypatch.setenv("XDG_CACHE_HOME", str(cache))
+    return registry, cache, payloads
+
+
+def test_ensure_data_file_downloads_and_caches(served):
+    registry, cache, payloads = served
+    path = ensure_data_file("kegg", "ko_reaction.tsv.gz", registry=registry)
+    assert path == cache / "raven_python" / "data" / "kegg-v1" / "ko_reaction.tsv.gz"
+    assert path.read_bytes() == payloads["ko_reaction.tsv.gz"]
+
+
+def test_ensure_data_file_reuses_cache(served, monkeypatch):
+    registry, _, _ = served
+    first = ensure_data_file("kegg", "ko_names.tsv.gz", registry=registry)
+    # Break the URL: a second call must hit the cache, not re-download.
+    registry["kegg"]["files"]["ko_names.tsv.gz"]["url"] = "file:///nonexistent"
+    second = ensure_data_file("kegg", "ko_names.tsv.gz", registry=registry)
+    assert first == second and second.exists()
+
+
+def test_sha256_mismatch_rejected(served):
+    registry, cache, _ = served
+    registry["kegg"]["files"]["rxn_flags.tsv.gz"]["sha256"] = "0" * 64
+    with pytest.raises(ValueError, match="SHA256 mismatch"):
+        ensure_data_file("kegg", "rxn_flags.tsv.gz", registry=registry)
+    # The corrupt partial download must not be left behind.
+    assert not (cache / "raven_python" / "data" / "kegg-v1" / "rxn_flags.tsv.gz").exists()
+
+
+def test_unknown_dataset_actionable_error(served):
+    registry, _, _ = served
+    with pytest.raises(FileNotFoundError, match="No data artefacts registered"):
+        ensure_data_file("metacyc", "x", registry=registry)
+
+
+def test_unknown_file_lists_available(served):
+    registry, _, _ = served
+    with pytest.raises(FileNotFoundError, match="not registered"):
+        ensure_data_file("kegg", "missing.tsv.gz", registry=registry)
+
+
+def test_ensure_kegg_data_fetches_core_set(served):
+    registry, cache, _ = served
+    out = ensure_kegg_data(registry=registry)
+    assert out == cache / "raven_python" / "data" / "kegg-v1"
+    for name in CORE_KEGG_FILES:
+        assert (out / name).is_file()
+
+
+def test_empty_registry_raises():
+    # The shipped registry is empty until artefacts are published.
+    with pytest.raises(FileNotFoundError, match="No data artefacts registered"):
+        ensure_data_file("kegg", "ko_reaction.tsv.gz")
diff --git a/tests/test_gapfilling.py b/tests/test_gapfilling.py
new file mode 100644
index 0000000..b92e982
--- /dev/null
+++ b/tests/test_gapfilling.py
@@ -0,0 +1,109 @@
+"""Tests for connectivity gap-filling (gapfilling/fill.py, Phase 4b)."""
+import cobra
+import pytest
+
+from raven_python.gapfilling import GapFillResult, connect_blocked_reactions
+
+
+def _met(mid):
+    return cobra.Metabolite(mid, name=mid, compartment="c")
+
+
+@pytest.fixture
+def draft_and_template():
+    """Draft: EX_A -> A -> B (r1), but B has no consumer, so r1 is blocked.
+
+    Template supplies B -> C (r2) and an exchange for C, which unblocks r1.
+    """
+    A, B = _met("A_c"), _met("B_c")
+    draft = cobra.Model("draft")
+    exa = cobra.Reaction("EX_A", lower_bound=-10, upper_bound=1000)
+    exa.add_metabolites({A: 1})
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)  # A -> B, irreversible
+    r1.add_metabolites({A: -1, B: 1})
+    draft.add_reactions([exa, r1])
+
+    template = cobra.Model("template")
+    r2 = cobra.Reaction("r2", lower_bound=0, upper_bound=1000)  # B -> C
+    r2.add_metabolites({_met("B_c"): -1, _met("C_c"): 1})
+    exc = cobra.Reaction("EX_C", lower_bound=-1000, upper_bound=1000)
+    exc.add_metabolites({_met("C_c"): -1})
+    extra = cobra.Reaction("r_unneeded", lower_bound=0, upper_bound=1000)  # D -> E, irrelevant
+    extra.add_metabolites({_met("D_c"): -1, _met("E_c"): 1})
+    template.add_reactions([r2, exc, extra])
+    return draft, template
+
+
+# --------------------------------------------------------------------------- #
+# Connectivity gap-fill
+# --------------------------------------------------------------------------- #
+def test_fill_gaps_connects_blocked_reaction(draft_and_template):
+    draft, template = draft_and_template
+    assert "r1" in cobra.flux_analysis.find_blocked_reactions(draft)  # precondition
+
+    res = connect_blocked_reactions(draft, template)
+    assert isinstance(res, GapFillResult)
+    assert "r1" in res.newly_connected
+    assert set(res.added_reactions) == {"r2", "EX_C"}  # both needed to drain B
+    assert "r_unneeded" not in res.added_reactions  # irrelevant template rxn not added
+
+
+def test_fill_gaps_returns_working_model_that_unblocks(draft_and_template):
+    draft, template = draft_and_template
+    res = connect_blocked_reactions(draft, template)
+    assert {"r2", "EX_C"} <= {r.id for r in res.model.reactions}
+    assert "r1" not in cobra.flux_analysis.find_blocked_reactions(res.model)
+    # original draft is untouched
+    assert "r2" not in {r.id for r in draft.reactions}
+
+
+def test_fill_gaps_nothing_to_do_when_unblocked(draft_and_template):
+    draft, template = draft_and_template
+    # give the draft its own drain so r1 is not blocked
+    drain = cobra.Reaction("EX_B", lower_bound=-1000, upper_bound=1000)
+    drain.add_metabolites({draft.metabolites.B_c: -1})
+    draft.add_reactions([drain])
+    res = connect_blocked_reactions(draft, template)
+    assert res.added_reactions == []
+    assert res.newly_connected == []
+
+
+def test_fill_gaps_scores_prefer_higher_scored_reactions():
+    # Two alternative single-reaction drains for B; scores should pick the preferred one.
+    A, B = _met("A_c"), _met("B_c")
+    draft = cobra.Model("draft")
+    exa = cobra.Reaction("EX_A", lower_bound=-10, upper_bound=1000)
+    exa.add_metabolites({A: 1})
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)
+    r1.add_metabolites({A: -1, B: 1})
+    draft.add_reactions([exa, r1])
+    template = cobra.Model("t")
+    d1 = cobra.Reaction("drain1", lower_bound=-1000, upper_bound=1000)
+    d1.add_metabolites({_met("B_c"): -1})
+    d2 = cobra.Reaction("drain2", lower_bound=-1000, upper_bound=1000)
+    d2.add_metabolites({_met("B_c"): -1})
+    template.add_reactions([d1, d2])
+    # Scores are penalties (higher = preferred = cheaper to include); only one drain
+    # is needed, so the less-penalised drain1 is chosen.
+    res = connect_blocked_reactions(draft, template, scores={"drain1": -1.0, "drain2": -5.0})
+    assert res.added_reactions == ["drain1"]
+
+
+def test_unconnectable_reaction_reported_not_added():
+    # A blocked irreversible reaction that no template can connect: reported, no adds.
+    A, B = _met("A_c"), _met("B_c")
+    draft = cobra.Model("draft")
+    exa = cobra.Reaction("EX_A", lower_bound=-10, upper_bound=1000)
+    exa.add_metabolites({A: 1})
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)  # A -> B, B has no drain
+    r1.add_metabolites({A: -1, B: 1})
+    draft.add_reactions([exa, r1])
+    template = cobra.Model("t")  # offers nothing that can drain B
+    noise = cobra.Reaction("noise", lower_bound=0, upper_bound=1000)
+    noise.add_metabolites({_met("X_c"): -1, _met("Y_c"): 1})
+    template.add_reactions([noise])
+
+    res = connect_blocked_reactions(draft, template)
+    assert res.added_reactions == []
+    assert res.newly_connected == []
+    assert "r1" in res.cannot_connect
diff --git a/tests/test_init.py b/tests/test_init.py
new file mode 100644
index 0000000..a61ee19
--- /dev/null
+++ b/tests/test_init.py
@@ -0,0 +1,110 @@
+"""Tests for the INIT MILP (init/init.py, Phase 4c)."""
+import cobra
+import pytest
+
+from raven_python.init import InitResult, run_init
+
+
+def _met(mid):
+    return cobra.Metabolite(mid, name=mid[:-2] if mid.endswith("_c") else mid, compartment="c")
+
+
+@pytest.fixture
+def model():
+    """EX_A -> A -(r1)-> B -(r2)-> C -(r3)-> D, with A uptake and excretion allowed.
+
+    r1, r2 are good (positive score); r3 is bad (negative score).
+    """
+    m = cobra.Model("net")
+    A, B, C, D = _met("A_c"), _met("B_c"), _met("C_c"), _met("D_c")
+    m.add_metabolites([A, B, C, D])
+    exa = cobra.Reaction("EX_A", lower_bound=-1000, upper_bound=1000)
+    exa.add_metabolites({A: -1})  # negative flux = uptake of A
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)
+    r1.add_metabolites({A: -1, B: 1})
+    r2 = cobra.Reaction("r2", lower_bound=0, upper_bound=1000)
+    r2.add_metabolites({B: -1, C: 1})
+    r3 = cobra.Reaction("r3", lower_bound=0, upper_bound=1000)
+    r3.add_metabolites({C: -1, D: 1})
+    m.add_reactions([exa, r1, r2, r3])
+    return m
+
+
+def test_keeps_positive_drops_negative(model):
+    scores = {"r1": 1.0, "r2": 1.0, "r3": -1.0}
+    res = run_init(model, scores, prod_weight=0.0, allow_excretion=True)
+    assert isinstance(res, InitResult)
+    kept = {r.id for r in res.model.reactions}
+    assert {"r1", "r2"} <= kept  # positive-score, flux-consistent -> kept
+    assert "r3" in res.deleted_reactions  # negative score -> removed
+    assert "r3" not in kept
+
+
+def test_negative_scores_emptied_when_no_reward(model):
+    # All reactions negative and no production reward -> keep nothing (empty optimum).
+    scores = {r.id: -1.0 for r in model.reactions}
+    res = run_init(model, scores, prod_weight=0.0, allow_excretion=True)
+    assert res.deleted_reactions == sorted(r.id for r in model.reactions)
+    assert len(res.model.reactions) == 0
+
+
+def test_essential_reaction_forced_kept(model):
+    # r3 is negative-scored but essential -> must be kept despite the penalty.
+    scores = {"r1": 1.0, "r2": 1.0, "r3": -1.0}
+    res = run_init(model, scores, essential_rxns=["r3"], prod_weight=0.0, allow_excretion=True)
+    kept = {r.id for r in res.model.reactions}
+    assert "r3" in kept
+    assert "r3" not in res.deleted_reactions
+
+
+def test_prod_weight_pulls_in_connectivity(model):
+    # With everything scored 0, no reward -> empty. With prod_weight>0, producing
+    # metabolites is rewarded, so flux-carrying reactions are pulled in.
+    zero = {r.id: 0.0 for r in model.reactions}
+    empty = run_init(model, zero, prod_weight=0.0, allow_excretion=True)
+    assert len(empty.model.reactions) == 0
+    pulled = run_init(model, zero, prod_weight=0.5, allow_excretion=True)
+    assert len(pulled.model.reactions) > 0
+
+
+def test_present_mets_reports_producibility(model):
+    scores = {"r1": 1.0, "r2": 1.0}
+    res = run_init(
+        model, scores, present_mets=["C", "Z"], prod_weight=0.0, allow_excretion=True
+    )
+    assert res.met_production["C"] is True   # A->B->C is producible
+    assert res.met_production["Z"] is False  # not in the model
+
+
+def test_objective_returned(model):
+    res = run_init(model, {"r1": 1.0, "r2": 1.0, "r3": -1.0}, prod_weight=0.0, allow_excretion=True)
+    assert res.objective == pytest.approx(2.0)  # kept r1(+1) + r2(+1), dropped r3
+
+
+def test_reversible_essential_keeps_productive_path():
+    """A reversible essential reaction must not be forced into a phantom fwd+rev loop.
+
+    SRC -> a, R: a <=> b (reversible, essential), SNK: b ->. Forcing R essential
+    should keep the productive path SRC->R->SNK, not delete SRC/SNK and leave R
+    self-looping (the bug from forcing eps flux through both split directions).
+    """
+    import cobra
+
+    m = cobra.Model("revess")
+    a, b = (cobra.Metabolite(x, compartment="c") for x in "ab")
+    m.add_metabolites([a, b])
+    src = cobra.Reaction("SRC", lower_bound=0, upper_bound=1000)
+    src.add_metabolites({a: 1})
+    r = cobra.Reaction("R", lower_bound=-1000, upper_bound=1000)
+    r.add_metabolites({a: -1, b: 1})
+    snk = cobra.Reaction("SNK", lower_bound=0, upper_bound=1000)
+    snk.add_metabolites({b: -1})
+    m.add_reactions([src, r, snk])
+    m.objective = "SNK"
+
+    res = run_init(m, {"SRC": -1.0, "SNK": -1.0}, essential_rxns=["R"], prod_weight=0.0)
+    kept = {rxn.id for rxn in res.model.reactions}
+    assert "R" in kept
+    # The productive path must be kept (SRC feeds R, SNK drains it); R can't self-loop.
+    assert {"SRC", "SNK"} <= kept
+    assert res.model.slim_optimize() > 1e-6  # the kept model actually carries flux
diff --git a/tests/test_init_build.py b/tests/test_init_build.py
new file mode 100644
index 0000000..cbc566f
--- /dev/null
+++ b/tests/test_init_build.py
@@ -0,0 +1,132 @@
+"""Tests for tINIT scoring + get_init_model (init/score.py, init/build.py)."""
+import math
+
+import cobra
+import pytest
+
+from raven_python.init import (
+    InitModelResult,
+    gene_scores_from_expression,
+    get_init_model,
+    score_reactions_from_genes,
+)
+
+
+# --------------------------------------------------------------------------- #
+# score_reactions_from_genes
+# --------------------------------------------------------------------------- #
+@pytest.fixture
+def gpr_model():
+    m = cobra.Model("g")
+    a = cobra.Metabolite("a_c", compartment="c")
+    b = cobra.Metabolite("b_c", compartment="c")
+    m.add_metabolites([a, b])
+    r_complex = cobra.Reaction("r_complex")  # (g1 and g2) or g3
+    r_complex.add_metabolites({a: -1, b: 1})
+    m.add_reactions([r_complex])
+    r_complex.gene_reaction_rule = "(g1 and g2) or g3"
+    r_nogene = cobra.Reaction("r_nogene")
+    r_nogene.add_metabolites({b: -1})
+    m.add_reactions([r_nogene])
+    return m
+
+
+def test_score_isozyme_max_complex_min(gpr_model):
+    # (g1 and g2) or g3 -> max(min(1, 4), 3) = max(1, 3) = 3
+    scores = score_reactions_from_genes(gpr_model, {"g1": 1.0, "g2": 4.0, "g3": 3.0})
+    assert scores["r_complex"] == 3.0
+
+
+def test_score_no_gene_reaction_gets_default(gpr_model):
+    scores = score_reactions_from_genes(gpr_model, {"g1": 1, "g2": 1, "g3": 1}, no_gene_score=-2.0)
+    assert scores["r_nogene"] == -2.0
+
+
+def test_score_missing_genes_omitted(gpr_model):
+    # g2 missing -> complex (g1 and g2) collapses to g1=1; OR with g3=3 -> max(1,3)=3
+    scores = score_reactions_from_genes(gpr_model, {"g1": 1.0, "g3": 3.0})
+    assert scores["r_complex"] == 3.0
+    # all genes missing -> no_gene_score
+    assert score_reactions_from_genes(gpr_model, {})["r_complex"] == -2.0
+
+
+def test_score_invalid_method(gpr_model):
+    with pytest.raises(ValueError, match="isozyme_scoring"):
+        score_reactions_from_genes(gpr_model, {}, isozyme_scoring="nonsense")
+
+
+# --------------------------------------------------------------------------- #
+# gene_scores_from_expression (RNA-seq path)
+# --------------------------------------------------------------------------- #
+def test_expression_scores_sign_and_clamp():
+    expr = {"hi": 100.0, "lo": 1.0, "mid": 10.0, "zero": 0.0}
+    ref = 10.0  # threshold/reference
+    s = gene_scores_from_expression(expr, ref)
+    assert s["hi"] == pytest.approx(min(5 * math.log(10), 10.0))  # above ref -> positive
+    assert s["lo"] == pytest.approx(max(5 * math.log(0.1), -5.0))  # below ref -> negative
+    assert s["mid"] == pytest.approx(0.0)  # at ref -> 0
+    assert s["zero"] == -5.0  # non-positive -> floor
+
+
+def test_expression_per_gene_reference():
+    expr = {"g": 20.0}
+    s = gene_scores_from_expression(expr, {"g": 5.0})
+    assert s["g"] == pytest.approx(5 * math.log(4))
+
+
+# --------------------------------------------------------------------------- #
+# get_init_model pipeline
+# --------------------------------------------------------------------------- #
+@pytest.fixture
+def model():
+    m = cobra.Model("net")
+    A, B, C, D = (cobra.Metabolite(x, name=x[:-2], compartment="c") for x in ("A_c", "B_c", "C_c", "D_c"))
+    m.add_metabolites([A, B, C, D])
+    exa = cobra.Reaction("EX_A", lower_bound=-1000, upper_bound=1000)
+    exa.add_metabolites({A: -1})
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)
+    r1.add_metabolites({A: -1, B: 1})
+    r2 = cobra.Reaction("r2", lower_bound=0, upper_bound=1000)
+    r2.add_metabolites({B: -1, C: 1})
+    r3 = cobra.Reaction("r3", lower_bound=0, upper_bound=1000)
+    r3.add_metabolites({C: -1, D: 1})
+    m.add_reactions([exa, r1, r2, r3])
+    for r, rule in (("r1", "g1"), ("r2", "g2"), ("r3", "g3")):
+        m.reactions.get_by_id(r).gene_reaction_rule = rule
+    return m
+
+
+def test_get_init_model_from_gene_scores(model):
+    # g1,g2 expressed (positive), g3 not (negative) -> keep r1,r2, drop r3.
+    res = get_init_model(model, gene_scores={"g1": 5.0, "g2": 5.0, "g3": -5.0}, prod_weight=0.0)
+    assert isinstance(res, InitModelResult)
+    kept = {r.id for r in res.model.reactions}
+    assert {"r1", "r2"} <= kept
+    assert "r3" not in kept
+    assert res.reaction_scores["r1"] == 5.0
+
+
+def test_get_init_model_requires_one_score_source(model):
+    with pytest.raises(ValueError, match="exactly one"):
+        get_init_model(model)
+    with pytest.raises(ValueError, match="exactly one"):
+        get_init_model(model, rxn_scores={}, gene_scores={})
+
+
+def test_get_init_model_essential_kept(model):
+    # r3 negative-scored but essential -> kept.
+    res = get_init_model(
+        model, rxn_scores={"r1": 1, "r2": 1, "r3": -1}, essential_rxns=["r3"], prod_weight=0.0
+    )
+    assert "r3" in {r.id for r in res.model.reactions}
+
+
+def test_get_init_model_removes_dead_ends(model):
+    # An isolated reaction that can never carry flux is dropped as a dead end.
+    X, Y = cobra.Metabolite("X_c", compartment="c"), cobra.Metabolite("Y_c", compartment="c")
+    dead = cobra.Reaction("dead", lower_bound=0, upper_bound=1000)
+    dead.add_metabolites({X: -1, Y: 1})  # X has no source, Y no sink (no exchange)
+    model.add_reactions([dead])
+    res = get_init_model(model, rxn_scores={"r1": 1, "r2": 1}, prod_weight=0.0)
+    assert "dead" in res.deleted_dead_end_reactions
+    assert "dead" not in {r.id for r in res.model.reactions}
diff --git a/tests/test_init_ftinit.py b/tests/test_init_ftinit.py
new file mode 100644
index 0000000..58f0542
--- /dev/null
+++ b/tests/test_init_ftinit.py
@@ -0,0 +1,139 @@
+"""Phase 4d.3: the single-step ftINIT MILP (run_ftinit).
+
+Validated on the testModel oracle against (a) a hand-checked score-optimal solution,
+(b) the formulation invariants, and (c) exact agreement with the already-tested
+run_init. The full-pipeline RAVEN outputs (tinitTests T0001/T0002) additionally
+involve linear merge + the toIgnore masks + staging + exchange re-adding, layered on
+in 4d.2/4d.3b/4d.5.
+
+Note on the toy result: with strict mass balance and no metabolite-production reward
+(ftINIT, unlike classic INIT, only rewards metabolomics-detected mets), the
+score-optimal subnetwork on testModel is the internal cycle R4→R6→(R10 rev)→(R9 rev),
+worth 7+0.5-3+3.5 = 8.0 — it beats the "honest" exchange path because that path must
+pay for the negative-score transport reactions R2/R7. The bare INIT MILP has no
+loopless constraint (neither does RAVEN's); loop-free models come from the staged
+pipeline + exchange handling and, at genome scale, from models having real exchanges
+so such cycles are not score-optimal. This faithfully matches RAVEN's MILP.
+"""
+import cobra
+import pytest
+from tinit_oracles import TEST_MODEL_SCORES, expr_for_rxn_score, make_test_model
+
+from raven_python.init import FtInitResult, run_ftinit, run_init
+from raven_python.init.score import gene_scores_from_expression, score_reactions_from_genes
+
+_LOOP = {"R4", "R6", "R9", "R10"}  # the score-optimal subnetwork (8.0)
+
+
+def _scores(model):
+    expr = expr_for_rxn_score(TEST_MODEL_SCORES)
+    return score_reactions_from_genes(model, gene_scores_from_expression(expr, 1.0))
+
+
+def test_full_milp_score_optimum():
+    model = make_test_model()
+    res = run_ftinit(model, _scores(model))
+    assert isinstance(res, FtInitResult)
+    assert set(res.kept_reactions) == _LOOP
+    assert res.deleted_reactions == ["R1", "R2", "R3", "R5", "R7", "R8"]
+    assert res.objective == pytest.approx(8.0, abs=1e-6)
+
+
+def test_kept_reactions_carry_flux_and_balance():
+    """Indicator-on reactions carry flux (≥ force_on) and the solution is steady-state."""
+    model = make_test_model()
+    res = run_ftinit(model, _scores(model))
+    for rid in res.kept_reactions:
+        assert abs(res.fluxes[rid]) > 1e-9
+    # The extracted model is itself feasible/flux-consistent.
+    assert res.model.slim_optimize() is not None
+
+
+def test_agrees_with_run_init():
+    """Exact agreement with the classic INIT MILP (no production reward, no rev loops).
+
+    run_init splits reversibles and double-scores both directions unless no_rev_loops,
+    so we compare under matching settings: same objective and same kept set.
+    """
+    model = make_test_model()
+    scores = _scores(model)
+    ft = run_ftinit(model, scores)
+    init = run_init(model, scores, prod_weight=0.0, eps=0.1, no_rev_loops=True)
+    assert set(ft.kept_reactions) == {r.id for r in init.model.reactions}
+    assert ft.objective == pytest.approx(init.objective, abs=1e-6)
+
+
+def test_essential_force_clamps_to_capacity():
+    """Forcing an essential reaction is clamped to its capacity (no lb>ub crash).
+
+    A reaction capped at 0.05 forced with the default 0.1 must not error; it is forced
+    to its capacity (0.05) and the model stays feasible. A per-reaction force of 0.04
+    forces exactly that.
+    """
+    m = cobra.Model("cap")
+    a, b = (cobra.Metabolite(x, compartment="s") for x in "ab")
+    m.add_metabolites([a, b])
+    r = cobra.Reaction("LOW", lower_bound=0, upper_bound=0.05)  # tiny capacity
+    r.add_metabolites({a: -1, b: 1})
+    for mid, st in [("EX_a", {a: -1}), ("EX_b", {b: -1})]:
+        ex = cobra.Reaction(mid, lower_bound=-1000, upper_bound=1000)
+        ex.add_metabolites(st)
+        m.add_reactions([ex])
+    m.add_reactions([r])
+    m.objective = "LOW"
+
+    res = run_ftinit(m, {}, essential_rxns=["LOW"], force_on_ess=0.1)  # clamped to 0.05
+    assert res.fluxes["LOW"] >= 0.05 - 1e-9
+    res2 = run_ftinit(m, {}, essential_rxns=["LOW"], essential_force={"LOW": 0.04})
+    assert res2.fluxes["LOW"] >= 0.04 - 1e-9
+
+
+def test_essential_reaction_forced_on():
+    """An essential reaction is kept and carries flux even when its score is negative."""
+    model = make_test_model()
+    res = run_ftinit(model, _scores(model), essential_rxns=["R3"])
+    assert "R3" in res.kept_reactions
+    assert abs(res.fluxes["R3"]) > 1e-6
+
+
+def test_rem_pos_rev_drops_positive_reversibles():
+    """rem_pos_rev frees positive reversibles (score→0): the score-8.0 loop collapses.
+
+    R4 (+7) and R10 (+3.5) are positive reversibles; with them unscored, the cycle is
+    no longer profitable (R6 0.5 - R9 3 < 0), so nothing scored stays on.
+    """
+    model = make_test_model()
+    res = run_ftinit(model, _scores(model), rem_pos_rev=True)
+    assert res.objective == pytest.approx(0.0, abs=1e-6)
+    assert "R6" not in res.kept_reactions and "R9" not in res.kept_reactions
+
+
+def test_allow_excretion_relaxes_balance():
+    """With allow_excretion the result stays feasible (net production permitted)."""
+    model = make_test_model()
+    res = run_ftinit(model, _scores(model), allow_excretion=True)
+    assert res.objective >= 8.0 - 1e-6  # at least as good as strict balance
+
+
+def test_unscored_reactions_are_kept_free():
+    """Score-0 reactions are left in the model (not removable), not deleted."""
+    model = make_test_model()
+    scores = _scores(model)
+    scores["R3"] = 0.0  # make R3 unscored -> must not be deleted
+    res = run_ftinit(model, scores)
+    assert "R3" not in res.deleted_reactions
+
+
+def test_forced_flux_lower_bound_is_respected():
+    """A scored, non-reversible reaction with lb>0 must keep carrying >= lb flux.
+
+    Guards the bound handling: the single-direction branch must use the model's own
+    [lb, ub], not zero out a positive lower bound.
+    """
+    model = make_test_model()
+    scores = _scores(model)
+    # R6 (2 d[c] => e[c]) is forward-irreversible; force >=2 flux through it.
+    model.reactions.get_by_id("R6").lower_bound = 2.0
+    res = run_ftinit(model, scores)
+    assert res.fluxes["R6"] >= 2.0 - 1e-6
+    assert "R6" not in res.deleted_reactions
diff --git a/tests/test_init_genes.py b/tests/test_init_genes.py
new file mode 100644
index 0000000..862ca17
--- /dev/null
+++ b/tests/test_init_genes.py
@@ -0,0 +1,71 @@
+"""Phase 4d.5: remove_low_score_genes — the three RAVEN docstring oracle cases.
+
+Scores use distinct values to avoid the random tie-break RAVEN mentions when all
+isozyme alternatives are negative.
+"""
+import cobra
+
+from raven_python.init import remove_low_score_genes
+
+
+def _model(rule: str) -> cobra.Model:
+    m = cobra.Model("g")
+    a = cobra.Metabolite("a", compartment="c")
+    b = cobra.Metabolite("b", compartment="c")
+    r = cobra.Reaction("R", lower_bound=0, upper_bound=1000)
+    r.add_metabolites({a: -1, b: 1})
+    m.add_reactions([r])
+    r.gene_reaction_rule = rule
+    return m
+
+
+def _norm(rule: str) -> str:
+    """cobra's normalized form of a GPR string, for order/paren-insensitive comparison."""
+    return _model(rule).reactions.R.gene_reaction_rule
+
+
+def _result(rule: str, scores: dict) -> str:
+    out, _ = remove_low_score_genes(_model(rule), scores)
+    return out.reactions.R.gene_reaction_rule
+
+
+def test_case1_isozyme_vs_complex():
+    """G1 or (G2 and G3 and G4); G1,G2 negative → keep the complex."""
+    # G1 more negative than G2 so the complex (= G2's score under min) is least-negative.
+    scores = {"G1": -2.0, "G2": -1.0, "G3": 1.0, "G4": 1.0}
+    assert _result("G1 or (G2 and G3 and G4)", scores) == _norm("G2 and G3 and G4")
+
+
+def test_case2_two_complexes():
+    """G1 or (G2 and G3) or (G4 and G5); G1,G2 negative → keep the positive complex."""
+    scores = {"G1": -1.0, "G2": -1.0, "G3": 1.0, "G4": 1.0, "G5": 1.0}
+    assert _result("G1 or (G2 and G3) or (G4 and G5)", scores) == _norm("G4 and G5")
+
+
+def test_case3_nested_isozyme_in_complex():
+    """(G1 and (G2 or G3) and G4); G2 negative → prune G2 from the inner isozyme group."""
+    scores = {"G1": 1.0, "G2": -1.0, "G3": 1.0, "G4": 1.0}
+    assert _result("G1 and (G2 or G3) and G4", scores) == _norm("G1 and G3 and G4")
+
+
+def test_complex_subunit_not_removed_individually():
+    """A negative subunit of a pure complex stays (the whole complex is kept)."""
+    scores = {"G1": 1.0, "G2": -1.0}
+    assert _result("G1 and G2", scores) == _norm("G1 and G2")
+
+
+def test_single_negative_gene_kept():
+    """A reaction's only gene is never removed (≥1 must remain)."""
+    assert _result("G1", {"G1": -5.0}) == "G1"
+
+
+def test_unscored_genes_not_removed():
+    """Genes absent from the score map are treated as unscored and not removed."""
+    scores = {"G1": -1.0}  # G2 unscored
+    assert _result("G1 or G2", scores) == _norm("G2")  # only the negative G1 dropped
+
+
+def test_removed_genes_reported_and_pruned():
+    out, removed = remove_low_score_genes(_model("G1 or G2"), {"G1": -1.0, "G2": 1.0})
+    assert removed == ["G1"]
+    assert "G1" not in {g.id for g in out.genes}
diff --git a/tests/test_init_merge.py b/tests/test_init_merge.py
new file mode 100644
index 0000000..f6fea8a
--- /dev/null
+++ b/tests/test_init_merge.py
@@ -0,0 +1,109 @@
+"""Phase 4d.2: linear reaction merging (merge_linear + group_rxn_scores).
+
+Oracles: RAVEN tinitTests T0004. testModel merges {R1,R2},{R3,R5},{R4,R6},{R7,R8},
+{R9,R10}; testModel4 merges {R5,R6},{R7,R8},{R9,R10} with two reactions flipped.
+"""
+import pytest
+from tinit_oracles import (
+    TEST_MODEL4_GROUP_IDS,
+    TEST_MODEL4_MERGED_REV,
+    TEST_MODEL4_REVERSED_RXNS,
+    TEST_MODEL_GROUP_IDS,
+    TEST_MODEL_GROUPED_SCORES,
+    TEST_MODEL_MERGED_LB,
+    TEST_MODEL_MERGED_REV,
+    TEST_MODEL_SCORES,
+    make_test_model,
+    make_test_model4,
+)
+
+from raven_python.init import group_rxn_scores, merge_linear
+
+
+def test_test_model_group_ids():
+    _, orig_ids, group_ids, _ = merge_linear(make_test_model())
+    assert orig_ids == [f"R{i}" for i in range(1, 11)]
+    assert group_ids == TEST_MODEL_GROUP_IDS  # [1,1,2,3,2,3,4,4,5,5]
+
+
+def test_test_model_reduced_shape():
+    reduced, _, _, _ = merge_linear(make_test_model())
+    # Five merged reactions, survivors keep the producer's id, original order.
+    assert [r.id for r in reduced.reactions] == ["R1", "R3", "R4", "R7", "R9"]
+    assert [int(r.lower_bound < 0) for r in reduced.reactions] == TEST_MODEL_MERGED_REV
+    assert [r.lower_bound for r in reduced.reactions] == TEST_MODEL_MERGED_LB
+
+
+def test_test_model_grouped_scores():
+    reduced, orig_ids, group_ids, _ = merge_linear(make_test_model())
+    scores = dict(zip(orig_ids, TEST_MODEL_SCORES, strict=True))
+    grouped = group_rxn_scores(reduced, scores, orig_ids, group_ids,
+                               to_zero={"R1", "R2", "R8"})
+    got = [grouped[r.id] for r in reduced.reactions]
+    assert got == pytest.approx(TEST_MODEL_GROUPED_SCORES)  # [0,-0.5,7.5,-1,0.5]
+
+
+def test_test_model4_group_ids_and_flips():
+    reduced, orig_ids, group_ids, reversed_rxns = merge_linear(make_test_model4())
+    assert group_ids == TEST_MODEL4_GROUP_IDS  # [0,0,0,0,1,1,2,2,3,3,0]
+    assert [int(r.lower_bound < 0) for r in reduced.reactions] == TEST_MODEL4_MERGED_REV
+    flipped = {oid for oid, rev in zip(orig_ids, reversed_rxns, strict=True) if rev}
+    assert flipped == set(TEST_MODEL4_REVERSED_RXNS)  # {R6, R9}
+
+
+def test_merge_preserves_feasible_space():
+    """The reduced model admits flux through the merged export path, like the original.
+
+    The reduced model carries no objective (merging drops genes and objective; ftINIT
+    sets its own from scores), so we set one on the surviving export reaction. R8
+    (e[s]=>) was merged into R7 (grp4), so R7 is the reduced export.
+    """
+    original = make_test_model()
+    assert original.slim_optimize() > 1e-9  # exports e via R8
+    reduced, _, _, _ = merge_linear(original)
+    reduced.objective = "R7"
+    assert reduced.slim_optimize() > 1e-9
+
+
+def test_no_merge_blocks_merging():
+    """A reaction in no_merge keeps its own group (id 0) and is not contracted."""
+    _, orig_ids, group_ids, _ = merge_linear(make_test_model(), no_merge=["R2"])
+    g = dict(zip(orig_ids, group_ids, strict=True))
+    assert g["R2"] == 0  # R2 never merged
+    # R1 was only mergeable with R2, so it stays unmerged too.
+    assert g["R1"] == 0
+
+
+def test_multipass_chain_collapses_to_one_group():
+    """A 3-reaction chain A→X→Y→Z collapses to one reaction (exercises multi-pass).
+
+    X is degree-2 (r1,r2), Y degree-2 (r2,r3); A and Z are degree-1 (retained). Merging
+    X makes Y newly degree-2 with the survivor, caught on a later pass. Confluence: all
+    three reactions end in one group, leaving the net A→Z reaction.
+    """
+    import cobra
+
+    m = cobra.Model("chain")
+    A, X, Y, Z = (cobra.Metabolite(i, name=i, compartment="c") for i in "AXYZ")
+    m.add_metabolites([A, X, Y, Z])
+    for rid, stoich in [("r1", {A: -1, X: 1}), ("r2", {X: -1, Y: 1}), ("r3", {Y: -1, Z: 1})]:
+        r = cobra.Reaction(rid, lower_bound=0, upper_bound=1000)
+        r.add_metabolites(stoich)
+        m.add_reactions([r])
+
+    reduced, orig_ids, group_ids, _ = merge_linear(m)
+    assert len(reduced.reactions) == 1                      # collapsed to net A -> Z
+    assert len(set(group_ids)) == 1 and group_ids[0] != 0   # all three in one group
+    only = reduced.reactions[0]
+    assert {mt.id: c for mt, c in only.metabolites.items()} == {"A": -1.0, "Z": 1.0}
+
+
+def test_group_scores_zero_handling():
+    """Genuine-zero score → 0.01; a group cancelling to zero with nonzero members → 0.01."""
+    reduced, orig_ids, group_ids, _ = merge_linear(make_test_model())
+    # Give group {R3,R5} scores that cancel: R3=+1, R5=-1 -> sum 0 but members nonzero.
+    scores = dict.fromkeys(orig_ids, 0.0)
+    scores["R3"], scores["R5"] = 1.0, -1.0
+    grouped = group_rxn_scores(reduced, scores, orig_ids, group_ids)
+    assert grouped["R3"] == pytest.approx(0.01)        # cancelled group rescued
+    assert grouped["R4"] == pytest.approx(0.02)         # {R4,R6} both genuine-0 → 0.01+0.01
diff --git a/tests/test_init_oracles.py b/tests/test_init_oracles.py
new file mode 100644
index 0000000..3f3e52d
--- /dev/null
+++ b/tests/test_init_oracles.py
@@ -0,0 +1,64 @@
+"""Validate the ftINIT toy oracles and that our scoring reproduces RAVEN's.
+
+This is Phase 4d.0: the correctness scaffold. The (ft)INIT MILP itself is not yet
+ported, so the on/off-output oracles in tinit_oracles live there as constants for the
+later sub-phases; here we lock down the pieces that already exist — the score→
+expression inversion and scoreComplexModel-equivalent scoring (RAVEN tinitTests
+T0009).
+"""
+import pytest
+from tinit_oracles import (
+    TEST_MODEL4_SCORES,
+    TEST_MODEL5_SCORES,
+    TEST_MODEL_SCORES,
+    expr_for_rxn_score,
+    make_test_model,
+    make_test_model4,
+    make_test_model5,
+)
+
+from raven_python.init.score import gene_scores_from_expression, score_reactions_from_genes
+
+
+@pytest.mark.parametrize(
+    "make_model, scores",
+    [
+        (make_test_model, TEST_MODEL_SCORES),
+        (make_test_model4, TEST_MODEL4_SCORES),
+        (make_test_model5, TEST_MODEL5_SCORES),
+    ],
+)
+def test_scoring_reproduces_defined_scores(make_model, scores):
+    """RAVEN T0009: expr_for_rxn_score → scoreComplexModel round-trips the scores."""
+    model = make_model()
+    expression = expr_for_rxn_score(scores)
+    gene_scores = gene_scores_from_expression(expression, 1.0)
+    rxn_scores = score_reactions_from_genes(model, gene_scores)
+    got = [rxn_scores[r.id] for r in model.reactions]
+    assert got == pytest.approx(scores, abs=1e-10)
+
+
+def test_expr_for_rxn_score_inverts_scoring():
+    """level = exp(score/5); 5·ln(level/1) recovers the score."""
+    scores = [-5, -1, 0.5, 7, 10]
+    expr = expr_for_rxn_score(scores)
+    recovered = gene_scores_from_expression(expr, 1.0)
+    assert [recovered[f"G{i + 1}"] for i in range(len(scores))] == pytest.approx(scores)
+
+
+def test_test_model_structure():
+    """Sanity: shapes, no-GPR reactions, reversibility, objective."""
+    m = make_test_model()
+    assert len(m.reactions) == 10 and len(m.metabolites) == 8
+    no_gpr = {r.id for r in m.reactions if not r.genes}
+    assert no_gpr == {"R1", "R2", "R8"}  # the reactions scored -2 (no gene)
+    rev = {r.id for r in m.reactions if r.lower_bound < 0}
+    assert rev == {"R2", "R3", "R4", "R9", "R10"}
+    assert m.objective.expression.as_coefficients_dict()  # objective set (R8)
+
+
+def test_test_model_is_feasible_for_the_task():
+    """The toy model can actually make e[s] from a[s] (so the task oracle is meaningful)."""
+    m = make_test_model()
+    m.objective = "R8"
+    assert m.slim_optimize() > 1e-6
diff --git a/tests/test_init_pipeline.py b/tests/test_init_pipeline.py
new file mode 100644
index 0000000..bf2a2ac
--- /dev/null
+++ b/tests/test_init_pipeline.py
@@ -0,0 +1,161 @@
+"""Phase 4d.3b: the staged ftINIT pipeline (prep_init_model + get_init_steps + ftinit).
+
+Oracles: RAVEN tinitTests T0001/T0002 on testModel with the default '1+1' schedule.
+"""
+
+from tinit_oracles import (
+    TEST_MODEL_FTINIT_NO_TASKS,
+    TEST_MODEL_FTINIT_SPONT_R7_R10,
+    TEST_MODEL_FTINIT_WITH_TASK,
+    TEST_MODEL_SCORES,
+    TEST_MODEL_TASK_ESSENTIAL_MERGED,
+    expr_for_rxn_score,
+    make_test_model,
+    make_test_task,
+)
+
+from raven_python.init import (
+    classify_reactions,
+    ftinit,
+    get_init_steps,
+    prep_init_model,
+    score_reactions_from_genes,
+)
+from raven_python.init.score import gene_scores_from_expression
+
+
+def _scores(model):
+    return score_reactions_from_genes(
+        model, gene_scores_from_expression(expr_for_rxn_score(TEST_MODEL_SCORES), 1.0)
+    )
+
+
+# --------------------------------------------------------------------------- #
+# classify_reactions (the toIgnore masks) — tinitTests T0001 mask oracle.
+# --------------------------------------------------------------------------- #
+def test_classify_exchange_and_transport():
+    masks = classify_reactions(make_test_model(), ext_comp="s")
+    assert masks.exchange == {"R1", "R8"}        # boundary reactions
+    assert masks.import_rxns == {"R2"}           # a[s] <=> a[c], no GPR, into ext comp
+    assert masks.no_gpr == {"R1", "R2", "R8"}
+    assert "R7" not in masks.import_rxns         # R7 has a GPR -> not a transport category
+
+
+def test_classify_spontaneous():
+    masks = classify_reactions(make_test_model(), ext_comp="s", spontaneous=["R7", "R10"])
+    assert masks.exchange | masks.spontaneous == {"R1", "R7", "R8", "R10"}
+
+
+def test_get_init_steps_default():
+    steps = get_init_steps("1+1")
+    assert len(steps) == 2
+    assert steps[0].how_to_use_prev == "ignore"
+    assert steps[0].ignore_mask == (1, 1, 1, 1, 1, 1, 1, 0)
+    assert steps[1].how_to_use_prev == "essential"
+    assert steps[1].ignore_mask == (1, 0, 0, 0, 1, 0, 0, 0)
+    assert len(get_init_steps("full")) == 1
+
+
+# --------------------------------------------------------------------------- #
+# Full '1+1' pipeline — T0001 (no tasks) and T0002 (with task).
+# --------------------------------------------------------------------------- #
+def test_ftinit_no_tasks_matches_oracle():
+    """T0001: testModel, no tasks, '1+1' → {R1,R4,R6,R8,R9,R10}."""
+    model = make_test_model()
+    prep = prep_init_model(model, ext_comp="s")
+    out = ftinit(prep, _scores(model))
+    assert {r.id for r in out.reactions} == set(TEST_MODEL_FTINIT_NO_TASKS)
+
+
+def test_ftinit_with_spontaneous_matches_oracle():
+    """T0001 variant: R7,R10 spontaneous → the path through R2/R7, {R1,R2,R4,R6,R7,R8}."""
+    model = make_test_model()
+    prep = prep_init_model(model, ext_comp="s", spontaneous=["R7", "R10"])
+    out = ftinit(prep, _scores(model))
+    assert {r.id for r in out.reactions} == set(TEST_MODEL_FTINIT_SPONT_R7_R10)
+
+
+def test_ftinit_with_task_matches_oracle():
+    """T0002: task 'make e[s] from a[s]' makes R2,R7 essential → {R1,R2,R4,R6,R7,R8,R9,R10}."""
+    model = make_test_model()
+    prep = prep_init_model(model, [make_test_task()], ext_comp="s")
+    # Essentials map to merged ids {R1, R7} (RAVEN T0002).
+    assert prep.essential_rxns == set(TEST_MODEL_TASK_ESSENTIAL_MERGED)
+    out = ftinit(prep, _scores(model))
+    assert {r.id for r in out.reactions} == set(TEST_MODEL_FTINIT_WITH_TASK)
+
+
+def test_full_series_runs():
+    """The single-step 'full' series also produces a feasible subnetwork."""
+    model = make_test_model()
+    prep = prep_init_model(model, ext_comp="s")
+    out = ftinit(prep, _scores(model), series="full")
+    assert len(out.reactions) >= 1
+
+
+def test_pipeline_with_gene_scores_and_tasks_wires_up():
+    """ftinit accepts gene_scores (gene pruning) + tasks (gap-fill) without breaking T0002.
+
+    The toy's GPRs are single-gene (nothing to prune) and the task is feasible in the
+    extracted model (nothing to gap-fill), so the reaction set is unchanged — this
+    confirms the integration wiring (the pruning/gap-fill logic is unit-tested
+    separately in test_init_genes / test_init_taskfill).
+    """
+    model = make_test_model()
+    gene_scores = gene_scores_from_expression(expr_for_rxn_score(TEST_MODEL_SCORES), 1.0)
+    prep = prep_init_model(model, [make_test_task()], ext_comp="s")
+    out = ftinit(prep, _scores(model), gene_scores=gene_scores)
+    assert {r.id for r in out.reactions} == set(TEST_MODEL_FTINIT_WITH_TASK)
+
+
+def test_orient_forward_reverses_a_reversible_reaction():
+    """_orient_forward(rxn, -1) flips stoichiometry and makes it irreversible forward."""
+    import cobra
+
+    from raven_python.init.prep import _orient_forward
+
+    m = cobra.Model("o")
+    a, b = (cobra.Metabolite(x, compartment="s") for x in "ab")
+    m.add_metabolites([a, b])
+    r = cobra.Reaction("R", lower_bound=-800, upper_bound=1000)
+    r.add_metabolites({a: -1, b: 2})  # a <=> 2 b
+    m.add_reactions([r])
+
+    _orient_forward(r, -1)  # forced reverse → becomes forward
+    assert r.bounds == (0, 800)  # [-800,1000] → flip [-1000,800] → lb→0
+    assert {mt.id: c for mt, c in r.metabolites.items()} == {"a": 1, "b": -2}  # 2 b => a
+
+    fwd = cobra.Reaction("F", lower_bound=-500, upper_bound=900)
+    fwd.add_metabolites({a: -1})
+    m.add_reactions([fwd])
+    _orient_forward(fwd, 1)  # forced forward → just made irreversible
+    assert fwd.bounds == (0, 900)
+
+
+def test_essential_merged_away_is_skipped():
+    """An essential reaction whose merge group collapses away imposes no constraint.
+
+    REV sits between two exchanges, so it merges with them into a trivial source→sink
+    that is removed; its group has no survivor. prep_init_model must skip it, not crash.
+    """
+    import cobra
+
+    from raven_python.tasks import Task
+
+    m = cobra.Model("collapse")
+    a, b = (cobra.Metabolite(x, name=x, compartment="s") for x in "ab")
+    m.add_metabolites([a, b])
+    r = cobra.Reaction("REV", lower_bound=-1000, upper_bound=1000)
+    r.add_metabolites({a: -1, b: 1})
+    r.gene_reaction_rule = "g1"
+    exchanges = []
+    for met in (a, b):
+        ex = cobra.Reaction(f"EX_{met.id}", lower_bound=-1000, upper_bound=1000)
+        ex.add_metabolites({met: -1})
+        exchanges.append(ex)
+    m.add_reactions([r, *exchanges])
+    m.objective = "REV"
+    task = Task(id="mk_a", inputs=[("b[s]", 0.0, 1000.0)], outputs=[("a[s]", 1.0, 1.0)])
+
+    prep = prep_init_model(m, [task], ext_comp="s")  # must not raise
+    assert "REV" not in prep.essential_rxns  # merged into a collapsed group
diff --git a/tests/test_init_solvers.py b/tests/test_init_solvers.py
new file mode 100644
index 0000000..514c408
--- /dev/null
+++ b/tests/test_init_solvers.py
@@ -0,0 +1,149 @@
+"""Cross-solver smoke tests for the (f)tINIT MILP path.
+
+The clean-data calibration and robustness studies were run on Gurobi; the tractability
+choices (big-M=100, MIP gap, time limits) and the Gurobi-specific param plumbing
+(``opt.problem.Params.MIPGap``) only matter if those choices also work on the *other*
+MILP backends real users have. These tests assert that each available MILP-capable
+optlang interface produces the same reaction-set verdict as Gurobi on the toy models the
+unit tests use — so a regression in solver portability fails CI instead of being found
+months later on a user's machine.
+
+Solvers tested: every MILP-capable cobra/optlang interface that imports in this env
+(Gurobi, HiGHS via ``hybrid``, GLPK). Missing ones are skipped automatically. Genome-scale
+behaviour is measured separately by ``scripts/analyze_init_solvers.py`` (manual benchmark).
+"""
+from __future__ import annotations
+
+import importlib
+
+import cobra
+import pytest
+
+from raven_python.init import ftinit, prep_init_model, run_ftinit, run_init
+from raven_python.tasks import Task, check_tasks
+
+# Detect which MILP-capable optlang interfaces actually work; skip the rest.
+# We do a real import (not just find_spec) because optlang ships every backend's
+# module file but those that wrap third-party solvers (gurobi, cplex) only import
+# cleanly when the underlying solver is installed — find_spec would say "present"
+# and then we'd crash at fixture time on CI runners without Gurobi.
+_INTERFACES = {"gurobi": "gurobi_interface", "hybrid": "hybrid_interface", "glpk": "glpk_interface"}
+
+
+def _solver_available(modname: str) -> bool:
+    try:
+        importlib.import_module(f"optlang.{modname}")
+        return True
+    except ImportError:
+        return False
+
+
+_AVAILABLE = [name for name, mod in _INTERFACES.items() if _solver_available(mod)]
+
+# Known upstream blocker: ``optlang.hybrid_interface.Configuration.clone()`` rejects
+# ``lp_method='primal'``. Marked strict so this flips red when optlang is fixed and
+# we should drop the marker. See docs/init_solver_benchmark.md.
+_XFAIL = {"hybrid": pytest.mark.xfail(
+    reason="optlang hybrid_interface.Configuration rejects lp_method='primal' (upstream)",
+    strict=True, raises=ValueError,
+)}
+
+
+def _param(name: str):
+    marks = [_XFAIL[name]] if name in _XFAIL else []
+    return pytest.param(name, marks=marks, id=name)
+
+
+@pytest.fixture(params=[_param(n) for n in _AVAILABLE])
+def solver(request):
+    """One installed MILP solver per parameter value."""
+    return request.param
+
+
+# ----------------------------------------------------------------------- toy fixtures
+
+def _met(mid, comp="c"):
+    return cobra.Metabolite(mid, name=mid.split("_")[0], compartment=comp)
+
+
+def _toy_init_model() -> cobra.Model:
+    """EX_A → A → B → C → D (r1, r2 good; r3 bad). Same network as test_init.py."""
+    def rxn(rid, lb, ub, mets):
+        r = cobra.Reaction(rid, lower_bound=lb, upper_bound=ub)
+        r.add_metabolites(mets)
+        return r
+    m = cobra.Model("toy")
+    A, B, C, D = (_met(x) for x in ("A_c", "B_c", "C_c", "D_c"))
+    m.add_metabolites([A, B, C, D])
+    m.add_reactions([rxn("EX_A", -1000, 1000, {A: -1}),
+                     rxn("r1", 0, 1000, {A: -1, B: 1}),
+                     rxn("r2", 0, 1000, {B: -1, C: 1}),
+                     rxn("r3", 0, 1000, {C: -1, D: 1})])
+    return m
+
+
+def _toy_ftinit_model() -> cobra.Model:
+    """Small flux-consistent network for ftINIT: A→B, B→C, parallel A→C (negative-score)."""
+    def rxn(rid, lb, ub, mets):
+        r = cobra.Reaction(rid, lower_bound=lb, upper_bound=ub)
+        r.add_metabolites(mets)
+        return r
+    m = cobra.Model("ftoy")
+    A, B, C = (_met(x) for x in ("A_c", "B_c", "C_c"))
+    m.add_metabolites([A, B, C])
+    m.add_reactions([rxn("EX_A", -1000, 0, {A: -1}),
+                     rxn("EX_C", 0, 1000, {C: -1}),
+                     rxn("r1", 0, 1000, {A: -1, B: 1}),
+                     rxn("r2", 0, 1000, {B: -1, C: 1}),
+                     rxn("rbad", 0, 1000, {A: -1, C: 1})])
+    return m
+
+
+# --------------------------------------------------------------------- tests
+
+def test_run_init_same_verdict(solver):
+    """tINIT MILP on a small network drops the negative-score reaction with any solver."""
+    m = _toy_init_model()
+    m.solver = solver
+    res = run_init(m, {"r1": 1.0, "r2": 1.0, "r3": -1.0}, prod_weight=0.0, allow_excretion=True)
+    assert "r3" in res.deleted_reactions
+    assert sorted(set(r.id for r in res.model.reactions)) == ["EX_A", "r1", "r2"]
+
+
+def test_run_ftinit_same_verdict(solver):
+    """ftINIT MILP picks the same on-set across solvers on a small network."""
+    m = _toy_ftinit_model()
+    m.solver = solver
+    res = run_ftinit(m, {"r1": 1.0, "r2": 1.0, "rbad": -1.0}, allow_excretion=True)
+    assert "rbad" not in res.on_reactions
+    assert {"r1", "r2"}.issubset(res.on_reactions)
+
+
+def test_check_tasks_works_per_solver(solver):
+    """check_tasks (one slim_optimize per task) works with each solver."""
+    m = _toy_ftinit_model()
+    m.solver = solver
+    task = Task(id="make_c", inputs=[("A[c]", 0.0, 1000.0)], outputs=[("C[c]", 1.0, 1.0)])
+    results = check_tasks(m, [task])
+    assert results[0].passed
+
+
+def test_ftinit_pipeline_with_tasks(solver):
+    """The full ftinit() pipeline (prep + staged MILP + gap-fill) runs with each solver."""
+    m = _toy_ftinit_model()
+    m.solver = solver
+    task = Task(id="make_c", inputs=[("A[c]", 0.0, 1000.0)], outputs=[("C[c]", 1.0, 1.0)])
+    prep = prep_init_model(m, [task])
+    out = ftinit(prep, {"r1": 1.0, "r2": 1.0, "rbad": -1.0}, series="1+1")
+    # Functional: the target task remains satisfiable in the extracted model.
+    assert check_tasks(out, [task])[0].passed
+
+
+def test_solver_param_plumbing(solver):
+    """mip_gap / time_limit reach the solver without raising (graceful per backend)."""
+    m = _toy_ftinit_model()
+    m.solver = solver
+    # Tight time limit + loose gap on a trivial problem; just verify the call returns.
+    res = run_ftinit(m, {"r1": 1.0, "rbad": -1.0}, allow_excretion=True,
+                     mip_gap=0.05, time_limit=60)
+    assert res.objective is not None
diff --git a/tests/test_init_taskfill.py b/tests/test_init_taskfill.py
new file mode 100644
index 0000000..c975f41
--- /dev/null
+++ b/tests/test_init_taskfill.py
@@ -0,0 +1,83 @@
+"""Phase 4d.4: task gap-filling (fill_tasks).
+
+Oracle: RAVEN tinitTests T0003. Remove the exchange reactions and create a gap by
+deleting R7 (e[c] -> e[s]); gap-filling against the full reference must add R7 back so
+the task 'make e[s] from a[s]' becomes feasible again.
+"""
+from tinit_oracles import make_test_model, make_test_task
+
+from raven_python.init import TaskFillResult, fill_tasks
+
+
+def _reference_without_exchanges():
+    """testModel with the exchange reactions (R1, R8) removed — the gap-fill template."""
+    ref = make_test_model()
+    ref.remove_reactions(["R1", "R8"], remove_orphans=False)
+    return ref
+
+
+def test_fills_the_gap_with_r7():
+    ref = _reference_without_exchanges()
+    gapped = ref.copy()
+    gapped.remove_reactions(["R7"], remove_orphans=False)  # the gap
+    res = fill_tasks(gapped, ref, [make_test_task()])
+    assert isinstance(res, TaskFillResult)
+    assert res.added_reactions == ["R7"]
+    assert "R7" in {r.id for r in res.model.reactions}
+    assert not res.failed_tasks
+
+
+def test_no_fill_when_already_feasible():
+    """A model that can already do the task gets no additions."""
+    ref = _reference_without_exchanges()
+    res = fill_tasks(ref.copy(), ref, [make_test_task()])
+    assert res.added_reactions == []
+
+
+def test_should_fail_tasks_ignored():
+    from raven_python.tasks import Task
+
+    ref = _reference_without_exchanges()
+    gapped = ref.copy()
+    gapped.remove_reactions(["R7"], remove_orphans=False)
+    sf = Task(id="sf", should_fail=True, outputs=[("e[s]", 1.0, 1.0)])
+    res = fill_tasks(gapped, ref, [sf])
+    assert res.added_reactions == []  # should_fail task drives no gap-filling
+
+
+def test_open_exchange_does_not_short_circuit_gapfill():
+    """Boundaries are closed during gap-filling, so an open exchange can't fake feasibility.
+
+    Give the gapped model an open exchange on e[s]; without closing boundaries the task
+    'produce e[s]' would look feasible (free secretion) and R7 would never be added.
+    """
+    import cobra
+
+    ref = _reference_without_exchanges()
+    gapped = ref.copy()
+    gapped.remove_reactions(["R7"], remove_orphans=False)
+    ex_es = cobra.Reaction("EX_es", lower_bound=-1000, upper_bound=1000)
+    ex_es.add_metabolites({gapped.metabolites.es: -1})
+    gapped.add_reactions([ex_es])  # open exchange that must be ignored
+    res = fill_tasks(gapped, ref, [make_test_task()])
+    assert "R7" in res.added_reactions  # gap still detected and filled
+
+
+def test_prefers_cheaper_reactions_by_score():
+    """When two candidates can fill a gap, the higher-scored (cheaper) one is chosen.
+
+    Build a gap that R7 (e[c]->e[s]) OR an alternative ALT (e[c]->e[s]) can fill; give
+    ALT a much better score so it is preferred.
+    """
+    import cobra
+
+    ref = _reference_without_exchanges()
+    alt = cobra.Reaction("ALT", lower_bound=0, upper_bound=1000)
+    alt.add_metabolites({ref.metabolites.ec: -1, ref.metabolites.es: 1})  # same as R7
+    alt.gene_reaction_rule = "gALT"
+    ref.add_reactions([alt])
+    gapped = ref.copy()
+    gapped.remove_reactions(["R7", "ALT"], remove_orphans=False)
+    # ALT scored high (cost low), R7 scored low (cost high) → ALT chosen.
+    res = fill_tasks(gapped, ref, [make_test_task()], rxn_scores={"ALT": 5.0, "R7": -3.0})
+    assert res.added_reactions == ["ALT"]
diff --git a/tests/test_io_excel.py b/tests/test_io_excel.py
new file mode 100644
index 0000000..12434ef
--- /dev/null
+++ b/tests/test_io_excel.py
@@ -0,0 +1,111 @@
+"""Tests for raven_python.io.excel (exportToExcelFormat port, export only)."""
+import cobra
+import pytest
+
+openpyxl = pytest.importorskip("openpyxl")
+
+from raven_python.io import export_to_excel
+from raven_python.manipulation import add_reactions_from_equations
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("yeastGEM")
+    m.name = "Yeast"
+    m.compartments = {"c": "cytoplasm"}
+    m.notes["metaData"] = {"taxonomy": "taxonomy/559292", "defaultLB": "-1000"}
+    m.add_metabolites(
+        [
+            cobra.Metabolite("atp_c", name="ATP", formula="C10H16N5O13P3", charge=-4, compartment="c"),
+            cobra.Metabolite("adp_c", name="ADP", compartment="c"),
+        ]
+    )
+    m.metabolites.atp_c.annotation = {"kegg.compound": ["C00002"], "smiles": ["C1=NC"]}
+    m.metabolites.atp_c.notes = {"inchis": "InChI=1S/X"}
+    add_reactions_from_equations(
+        m,
+        [{"id": "R1", "equation": "atp_c <=> adp_c", "name": "rxn one",
+          "gene_reaction_rule": "G1", "subsystem": "glycolysis"}],
+    )
+    r = m.reactions.R1
+    r.annotation = {"ec-code": ["1.1.1.1"], "kegg.reaction": ["R00001"]}
+    r.notes = {"confidence_score": 2, "note": "a note", "references": "PMID:1"}
+    r.objective_coefficient = 1
+    return m
+
+
+def _wb(path):
+    return openpyxl.load_workbook(path)
+
+
+def test_sheets_present(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    wb = _wb(out)
+    assert set(wb.sheetnames) == {"RXNS", "METS", "COMPS", "GENES", "MODEL"}
+
+
+def test_rxns_sheet(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    ws = _wb(out)["RXNS"]
+    header = [c.value for c in ws[1]]
+    row = {header[i]: c.value for i, c in enumerate(ws[2])}
+    assert row["ID"] == "R1"
+    assert row["NAME"] == "rxn one"
+    assert "ATP[c]" in row["EQUATION"] and "<=>" in row["EQUATION"]
+    assert row["EC-NUMBER"] == "1.1.1.1"
+    assert row["GENE ASSOCIATION"] == "G1"
+    assert row["SUBSYSTEM"] == "glycolysis"
+    assert row["OBJECTIVE"] == 1
+    assert row["CONFIDENCE SCORE"] == 2
+    assert row["NOTE"] == "a note"
+    assert row["MIRIAM"] == "kegg.reaction/R00001"  # ec-code excluded (own column)
+
+
+def test_mets_sheet(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    ws = _wb(out)["METS"]
+    header = [c.value for c in ws[1]]
+    rows = {
+        r[header.index("REPLACEMENT ID")].value: {header[i]: c.value for i, c in enumerate(r)}
+        for r in ws.iter_rows(min_row=2)
+    }
+    atp = rows["atp_c"]
+    assert atp["ID"] == "ATP[c]"
+    assert atp["NAME"] == "ATP"
+    assert atp["InChI"] == "InChI=1S/X"
+    assert atp["COMPOSITION"] is None  # suppressed when InChI present
+    assert atp["CHARGE"] == -4
+    assert atp["MIRIAM"] == "kegg.compound/C00002"  # smiles excluded
+
+
+def test_model_sheet(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    ws = _wb(out)["MODEL"]
+    header = [c.value for c in ws[1]]
+    row = {header[i]: c.value for i, c in enumerate(ws[2])}
+    assert row["ID"] == "yeastGEM"
+    assert row["NAME"] == "Yeast"
+    assert row["TAXONOMY"] == "taxonomy/559292"
+    assert row["DEFAULT LOWER"] == "-1000"
+
+
+def test_genes_sheet(model, tmp_path):
+    out = tmp_path / "m.xlsx"
+    export_to_excel(model, out)
+    ws = _wb(out)["GENES"]
+    header = [c.value for c in ws[1]]
+    row = {header[i]: c.value for i, c in enumerate(ws[2])}
+    assert row["NAME"] == "G1"
+
+
+def test_no_genes_skips_sheet(tmp_path):
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite("a_c", compartment="c")])
+    add_reactions_from_equations(m, [{"id": "R1", "equation": "a_c -->"}])
+    out = tmp_path / "m.xlsx"
+    export_to_excel(m, out)
+    assert "GENES" not in _wb(out).sheetnames
diff --git a/tests/test_io_git.py b/tests/test_io_git.py
new file mode 100644
index 0000000..28881dc
--- /dev/null
+++ b/tests/test_io_git.py
@@ -0,0 +1,69 @@
+"""Tests for raven_python.io.git (exportForGit port)."""
+import cobra
+import pytest
+
+from raven_python.io import export_for_git
+from raven_python.manipulation import add_reactions_from_equations
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("yeastGEM")
+    m.compartments = {"c": "cytoplasm"}
+    m.add_metabolites(
+        [cobra.Metabolite("atp_c", name="ATP", compartment="c"),
+         cobra.Metabolite("adp_c", name="ADP", compartment="c")]
+    )
+    add_reactions_from_equations(m, [{"id": "R1", "equation": "atp_c <=> adp_c"}])
+    return m
+
+
+def test_standard_gem_layout(model, tmp_path):
+    root = export_for_git(model, tmp_path, prefix="yeast", formats=("yml", "xml", "mat", "xlsx", "txt"))
+    assert root == tmp_path / "model"
+    assert (root / "yml" / "yeast.yml").exists()
+    assert (root / "xml" / "yeast.xml").exists()
+    assert (root / "mat" / "yeast.mat").exists()
+    assert (root / "xlsx" / "yeast.xlsx").exists()
+    assert (root / "txt" / "yeast.txt").exists()
+    assert (root / "dependencies.txt").exists()
+
+
+def test_dependencies_file(model, tmp_path):
+    root = export_for_git(model, tmp_path, formats=("yml",))
+    deps = (root / "dependencies.txt").read_text()
+    assert "python\t" in deps
+    assert "cobra\t" in deps
+    assert "raven_python\t" in deps
+
+
+def test_flat_layout(model, tmp_path):
+    root = export_for_git(model, tmp_path, formats=("yml",), sub_dirs=False)
+    assert root == tmp_path
+    assert (tmp_path / "model.yml").exists()
+
+
+def test_subset_of_formats(model, tmp_path):
+    root = export_for_git(model, tmp_path, formats=("yml", "xml"))
+    assert (root / "yml" / "model.yml").exists()
+    assert not (root / "mat").exists()
+    assert not (root / "xlsx").exists()
+
+
+def test_does_not_mutate_model(model, tmp_path):
+    order_before = [r.id for r in model.reactions]
+    export_for_git(model, tmp_path, formats=("yml",))
+    assert [r.id for r in model.reactions] == order_before
+
+
+def test_txt_table_content(model, tmp_path):
+    root = export_for_git(model, tmp_path, formats=("txt",))
+    txt = (root / "txt" / "model.txt").read_text()
+    assert txt.splitlines()[0].startswith("Rxn name\t")
+    assert "R1" in txt
+    assert "ATP[c]" in txt
+
+
+def test_bad_format(model, tmp_path):
+    with pytest.raises(ValueError, match="Unknown format"):
+        export_for_git(model, tmp_path, formats=("yml", "json"))
diff --git a/tests/test_io_sif.py b/tests/test_io_sif.py
new file mode 100644
index 0000000..d50ad98
--- /dev/null
+++ b/tests/test_io_sif.py
@@ -0,0 +1,82 @@
+"""Tests for raven_python.io.sif (exportModelToSIF port)."""
+import cobra
+import pytest
+
+from raven_python.io import export_model_to_sif
+from raven_python.manipulation import add_reactions_from_equations
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("a", "b", "c")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R1", "equation": "a --> b"},
+            {"id": "R2", "equation": "b --> c"},
+        ],
+    )
+    return m
+
+
+def _lines(path):
+    return [ln.split("\t") for ln in path.read_text().splitlines()]
+
+
+def test_reaction_compound(model, tmp_path):
+    out = tmp_path / "g.sif"
+    export_model_to_sif(model, out, "rc")
+    rows = {r[0]: (r[1], set(r[2:])) for r in _lines(out)}
+    assert rows["R1"] == ("rc", {"a", "b"})
+    assert rows["R2"] == ("rc", {"b", "c"})
+
+
+def test_reaction_reaction(model, tmp_path):
+    out = tmp_path / "g.sif"
+    export_model_to_sif(model, out, "rr")
+    rows = {r[0]: set(r[2:]) for r in _lines(out)}
+    # R1 and R2 share metabolite b
+    assert rows["R1"] == {"R2"}
+    assert rows["R2"] == {"R1"}
+
+
+def test_compound_compound(model, tmp_path):
+    out = tmp_path / "g.sif"
+    export_model_to_sif(model, out, "cc")
+    rows = {r[0]: set(r[2:]) for r in _lines(out)}
+    # a is a substrate of R1 (a->b): a links to product b
+    assert "b" in rows.get("a", set())
+    # b is substrate of R2 (b->c): b links to c
+    assert "c" in rows.get("b", set())
+
+
+def test_custom_labels(model, tmp_path):
+    out = tmp_path / "g.sif"
+    export_model_to_sif(model, out, "rc", reaction_labels={"R1": "Reaction1"})
+    sources = {r[0] for r in _lines(out)}
+    assert "Reaction1" in sources
+    assert "R1" not in sources
+
+
+def test_bad_graph_type(model, tmp_path):
+    with pytest.raises(ValueError, match="graph_type"):
+        export_model_to_sif(model, tmp_path / "g.sif", "xx")
+
+
+def test_cc_does_not_mutate_input(model, tmp_path):
+    n_before = len(model.reactions)
+    export_model_to_sif(model, tmp_path / "g.sif", "cc")
+    assert len(model.reactions) == n_before  # convert_to_irreversible ran on a copy
+
+
+# --- regression: label-map collision (known_issues.md B4) ------------------
+
+def test_collapsing_label_map_warns(model, tmp_path):
+    """A label map that sends two distinct ids to the same label silently merges
+    nodes during the target-side dedup. Now warns so the user sees it."""
+    with pytest.warns(UserWarning, match="multiple ids to the same label"):
+        export_model_to_sif(
+            model, tmp_path / "g.sif", "rc",
+            reaction_labels={"R1": "shared", "R2": "shared"},
+        )
diff --git a/tests/test_io_yaml.py b/tests/test_io_yaml.py
new file mode 100644
index 0000000..510af5f
--- /dev/null
+++ b/tests/test_io_yaml.py
@@ -0,0 +1,202 @@
+"""Tests for raven_python.io.yaml against the RAVEN fa281a1 (cobra-native !!omap) schema."""
+from pathlib import Path
+
+import cobra
+import pytest
+from cobra.io.yaml import yaml as cobra_yaml
+
+from raven_python.io import read_yaml_model, write_yaml_model
+
+# A model laid out exactly as RAVEN writeYAMLmodel (fa281a1) emits: cobra-native
+# structure, RAVEN-only fields as top-level per-entry keys, smiles/ec-code inside
+# the annotation block, metaData provenance-only, id/name/version top-level.
+RAVEN_DOC = {
+    "metabolites": [
+        {
+            "id": "s_0001",
+            "name": "ATP",
+            "compartment": "c",
+            "formula": "C10H16N5O13P3",
+            "charge": -4,
+            "inchis": "InChI=1S/CH4",
+            "deltaG": 12.5,
+            "notes": "a metabolite note",
+            "metFrom": "KEGG",
+            "annotation": {"kegg.compound": ["C00002"], "smiles": ["C1=NC2"]},
+        },
+        {"id": "s_0002", "name": "ADP", "compartment": "c"},
+    ],
+    "reactions": [
+        {
+            "id": "R1",
+            "name": "rxn one",
+            "metabolites": {"s_0001": -1, "s_0002": 1},
+            "lower_bound": -1000.0,
+            "upper_bound": 1000.0,
+            "gene_reaction_rule": "G1",
+            "subsystem": "glycolysis",
+            "confidence_score": 2,
+            "references": "PMID:123",
+            "rxnFrom": "manual",
+            "notes": "a reaction note",
+            "deltaG": -5.0,
+            "annotation": {"ec-code": ["1.1.1.1"]},
+        }
+    ],
+    "genes": [
+        {"id": "G1", "name": "gene one", "protein": "P12345", "annotation": {"uniprot": ["P12345"]}}
+    ],
+    "id": "testModel",
+    "name": "Test Model",
+    "compartments": {"c": "cytoplasm"},
+    "version": "1.0",
+    "metaData": {"date": "2026-05-23", "taxonomy": "taxonomy/559292", "defaultLB": "-1000"},
+    "ec-rxns": [{"id": "R1", "kcat": 100.0}],
+}
+
+
+@pytest.fixture
+def yaml_file(tmp_path) -> Path:
+    p = tmp_path / "model.yml"
+    with open(p, "w", encoding="utf-8") as fh:
+        cobra_yaml.dump(RAVEN_DOC, fh)
+    return p
+
+
+def test_standard_content(yaml_file):
+    model = read_yaml_model(yaml_file)
+    assert model.id == "testModel"
+    assert model.name == "Test Model"
+    assert {m.id for m in model.metabolites} == {"s_0001", "s_0002"}
+    r = model.reactions.get_by_id("R1")
+    assert r.bounds == (-1000.0, 1000.0)
+    assert r.subsystem == "glycolysis"
+    assert r.gene_reaction_rule == "G1"
+
+
+def test_annotation_owned_by_cobra(yaml_file):
+    # smiles / ec-code / miriam live in the annotation block (cobra reads them)
+    model = read_yaml_model(yaml_file)
+    assert model.metabolites.get_by_id("s_0001").annotation["smiles"] == ["C1=NC2"]
+    assert model.metabolites.get_by_id("s_0001").annotation["kegg.compound"] == ["C00002"]
+    assert model.reactions.get_by_id("R1").annotation["ec-code"] == ["1.1.1.1"]
+    assert model.genes.get_by_id("G1").annotation["uniprot"] == ["P12345"]
+
+
+def test_raven_only_fields_captured(yaml_file):
+    model = read_yaml_model(yaml_file)
+    a = model.metabolites.get_by_id("s_0001")
+    assert a.notes["inchis"] == "InChI=1S/CH4"
+    assert a.notes["deltaG"] == 12.5
+    assert a.notes["note"] == "a metabolite note"  # RAVEN metNotes string, no crash
+    assert a.notes["metFrom"] == "KEGG"
+    assert "smiles" not in a.notes  # smiles stays in annotation
+    r = model.reactions.get_by_id("R1")
+    assert r.notes["confidence_score"] == 2
+    assert r.notes["references"] == "PMID:123"
+    assert r.notes["rxnFrom"] == "manual"
+    assert r.notes["note"] == "a reaction note"
+    assert r.notes["deltaG"] == -5.0
+    assert model.genes.get_by_id("G1").notes["protein"] == "P12345"
+
+
+def test_model_level_extras(yaml_file):
+    model = read_yaml_model(yaml_file)
+    assert model.notes["metaData"]["taxonomy"] == "taxonomy/559292"
+    assert model.notes["version"] == "1.0"
+    assert model.notes["_yaml_sections"]["ec-rxns"][0]["kcat"] == 100.0
+
+
+def test_round_trip(yaml_file, tmp_path):
+    model = read_yaml_model(yaml_file)
+    out = tmp_path / "out.yml"
+    write_yaml_model(model, out)
+    reloaded = read_yaml_model(out)
+
+    assert reloaded.id == "testModel"
+    assert reloaded.notes["version"] == "1.0"
+    assert reloaded.notes["metaData"]["taxonomy"] == "taxonomy/559292"
+    a = reloaded.metabolites.get_by_id("s_0001")
+    assert a.notes["deltaG"] == 12.5
+    assert a.notes["note"] == "a metabolite note"
+    assert a.annotation["smiles"] == ["C1=NC2"]
+    r = reloaded.reactions.get_by_id("R1")
+    assert r.notes["confidence_score"] == 2
+    assert reloaded.genes.get_by_id("G1").notes["protein"] == "P12345"
+    assert reloaded.notes["_yaml_sections"]["ec-rxns"][0]["id"] == "R1"
+
+
+def test_extra_notes_not_dropped_when_free_text_note_present(yaml_file, tmp_path):
+    """An entry with both a RAVEN free-text note and an extra note keeps both on write."""
+    model = read_yaml_model(yaml_file)
+    a = model.metabolites.get_by_id("s_0001")
+    a.notes["note"] = "free text"
+    a.notes["custom"] = "extra value"  # a non-RAVEN note that must not be silently lost
+    out = tmp_path / "out.yml"
+    write_yaml_model(model, out)
+    text = out.read_text()
+    assert "extra value" in text  # the leftover note survives serialization
+
+
+def test_gzipped_round_trip(yaml_file, tmp_path):
+    # A .yml.gz path is transparently gzipped on write and read.
+    model = read_yaml_model(yaml_file)
+    out = tmp_path / "out.yml.gz"
+    write_yaml_model(model, out)
+    assert out.read_bytes()[:2] == b"\x1f\x8b"  # gzip magic
+    reloaded = read_yaml_model(out)
+    assert reloaded.id == "testModel"
+    assert {m.id for m in reloaded.metabolites} == {"s_0001", "s_0002"}
+
+
+def test_output_is_cobra_readable(yaml_file, tmp_path):
+    # The written file must load with stock cobra (it's cobra's native format).
+    model = read_yaml_model(yaml_file)
+    out = tmp_path / "out.yml"
+    write_yaml_model(model, out)
+    cobra_model = cobra.io.load_yaml_model(str(out))
+    assert cobra_model.id == "testModel"
+    assert {m.id for m in cobra_model.metabolites} == {"s_0001", "s_0002"}
+    # RAVEN-only fields land in cobra notes; smiles in annotation
+    assert cobra_model.metabolites.get_by_id("s_0001").annotation["smiles"] == ["C1=NC2"]
+
+
+def test_write_emits_raven_top_level_keys(yaml_file, tmp_path):
+    model = read_yaml_model(yaml_file)
+    out = tmp_path / "out.yml"
+    write_yaml_model(model, out)
+    text = out.read_text()
+    # RAVEN-only fields are lifted back to top-level entry keys, not buried in notes
+    assert "inchis:" in text
+    assert "deltaG:" in text
+    assert "confidence_score:" in text
+    assert "metaData:" in text
+
+
+def test_legacy_id_in_metadata(tmp_path):
+    # Older RAVEN files nest id/name under metaData and have no top-level id.
+    legacy = {
+        "metabolites": [{"id": "a_c", "name": "A", "compartment": "c"}],
+        "reactions": [],
+        "genes": [],
+        "compartments": {"c": "cyt"},
+        "metaData": {"id": "legacyModel", "name": "Legacy"},
+    }
+    p = tmp_path / "legacy.yml"
+    with open(p, "w", encoding="utf-8") as fh:
+        cobra_yaml.dump(legacy, fh)
+    model = read_yaml_model(p)
+    assert model.id == "legacyModel"
+    assert model.name == "Legacy"
+
+
+# Optional smoke test against a real model file if present.
+_YEAST = Path("/home/eduardk/github/GECKO/tutorials/full_ecModel/models/yeast-GEM.yml")
+
+
+@pytest.mark.skipif(not _YEAST.exists(), reason="real yeast-GEM.yml not available")
+def test_real_yeast_gem_loads():
+    model = read_yaml_model(_YEAST)
+    assert len(model.reactions) > 1000
+    # legacy file: identity comes from metaData
+    assert model.id
diff --git a/tests/test_localization.py b/tests/test_localization.py
new file mode 100644
index 0000000..d177db5
--- /dev/null
+++ b/tests/test_localization.py
@@ -0,0 +1,227 @@
+"""Tests for raven_python.localization — predictor loaders + the MILP + apply (Phase 7)."""
+from __future__ import annotations
+
+from textwrap import dedent
+
+import cobra
+import pandas as pd
+import pytest
+
+from raven_python.localization import (
+    LocalizationProposal,
+    LocalizationResult,
+    LocalizationScores,
+    apply_localization,
+    load_deeploc,
+    load_wolfpsort,
+    predict_localization,
+)
+
+# --------------------------------------------------------------------- loaders
+
+def test_load_wolfpsort_basic(tmp_path):
+    p = tmp_path / "wolf.txt"
+    p.write_text(dedent("""\
+        # header comment
+        Gene1 cyto 13, nucl 7, mito 4
+        Gene2: treating 9 X's as Glycines
+        Gene3 mito 20, cyto 2
+    """))
+    s = load_wolfpsort(p)
+    assert "Gene1" in s.genes
+    assert "Gene2" not in s.genes      # the 'treating' line is skipped
+    assert "Gene3" in s.genes
+    # row-normalised to max=1:
+    assert s.df.loc["Gene1", "cyto"] == pytest.approx(1.0)   # 13/13
+    assert s.df.loc["Gene1", "nucl"] == pytest.approx(7 / 13)
+    assert s.df.loc["Gene3", "mito"] == pytest.approx(1.0)
+    assert s.df.loc["Gene3", "cyto"] == pytest.approx(0.1)
+
+
+def test_load_deeploc_csv(tmp_path):
+    p = tmp_path / "deeploc.csv"
+    p.write_text(dedent("""\
+        Protein_ID,Localizations,Signals,Cytoplasm,Nucleus,Mitochondrion
+        G1,Cytoplasm,,0.8,0.1,0.05
+        G2,Mitochondrion,SP,0.05,0.15,0.9
+    """))
+    s = load_deeploc(p)
+    assert set(s.compartments) == {"Cytoplasm", "Nucleus", "Mitochondrion"}
+    # row-max → 1.0
+    assert s.df.loc["G1", "Cytoplasm"] == pytest.approx(1.0)
+    assert s.df.loc["G2", "Mitochondrion"] == pytest.approx(1.0)
+
+
+def test_localization_scores_with_compartments_rename():
+    df = pd.DataFrame({"cyto": [1.0], "mito": [0.2]}, index=pd.Index(["g1"], name="gene_id"))
+    s = LocalizationScores(df).with_compartments({"cyto": "c", "mito": "m"})
+    assert list(s.compartments) == ["c", "m"]
+
+
+# ----------------------------------------------------------------- predict (toy)
+
+def _toy_two_compartment_model() -> cobra.Model:
+    """Single-compartment draft (everything in 'c'):
+
+    A_c -(r1)-> B_c -(r2)-> C_c        (r2 should move to 'm' per scores below)
+    Boundary EX_A imports A; EX_C drains C.
+    """
+    m = cobra.Model("toy")
+    A, B, C = (cobra.Metabolite(x + "_c", name=x, compartment="c") for x in "ABC")
+    m.add_metabolites([A, B, C])
+
+    def rxn(rid, lb, ub, mets, gpr=None):
+        r = cobra.Reaction(rid, lower_bound=lb, upper_bound=ub)
+        r.add_metabolites(mets)
+        if gpr:
+            r.gene_reaction_rule = gpr
+        return r
+    m.add_reactions([rxn("EX_A", -1000, 0, {A: -1}),
+                     rxn("EX_C", 0, 1000, {C: -1}),
+                     rxn("r1", 0, 1000, {A: -1, B: 1}, "g1"),
+                     rxn("r2", 0, 1000, {B: -1, C: 1}, "g2")])
+    return m
+
+
+def test_predict_empty_relocate_set_is_no_op():
+    """An empty relocate set short-circuits to an empty proposal."""
+    m = _toy_two_compartment_model()
+    scores = LocalizationScores(pd.DataFrame(
+        {"c": [1.0, 0.0], "m": [0.0, 1.0]}, index=pd.Index(["g1", "g2"], name="gene_id")))
+    res = predict_localization(m, scores, reactions_to_relocate=[], apply=False)
+    assert isinstance(res, LocalizationProposal)
+    assert res.moved.empty
+
+
+def test_predict_places_single_reaction():
+    """Pass r2 in the relocate set; it goes to 'm' per scores."""
+    m = _toy_two_compartment_model()
+    scores = LocalizationScores(pd.DataFrame(
+        {"c": [1.0, 0.0], "m": [0.0, 1.0]}, index=pd.Index(["g1", "g2"], name="gene_id")))
+    res = predict_localization(m, scores, ["r2"], default_compartment="c", apply=False,
+                                transport_cost=0.1)
+    assert isinstance(res, LocalizationProposal)
+    assert set(res.moved["rxn_id"]) == {"r2"}
+    assert res.moved.iloc[0]["to_compartment"] == "m"
+
+
+def test_predict_apply_creates_compartment_metabolites_and_transports():
+    """apply=True should mutate the (copy) model: r2 in m, and B/C transports added."""
+    m = _toy_two_compartment_model()
+    scores = LocalizationScores(pd.DataFrame(
+        {"c": [1.0, 0.0], "m": [0.0, 1.0]}, index=pd.Index(["g1", "g2"], name="gene_id")))
+    res = predict_localization(m, scores, ["r2"], default_compartment="c", apply=True,
+                                transport_cost=0.05)
+    assert isinstance(res, LocalizationResult)
+    out = res.model
+    r2 = out.reactions.r2
+    assert {mt.compartment for mt in r2.metabolites} == {"m"}   # both substrates now in m
+    # B_m and C_m metabolite copies must exist:
+    assert "B_m" in out.metabolites and "C_m" in out.metabolites
+    # Transports tr_B_m and tr_C_m must be added (default c ↔ m):
+    transport_ids = {t.id for t in res.added_transports}
+    assert "tr_B_m" in transport_ids
+    assert "tr_C_m" in transport_ids
+    # Original model untouched (we copied).
+    assert m.reactions.r2.metabolites != r2.metabolites
+    assert "B_m" not in m.metabolites
+
+
+def test_predict_unplaced_reaction_when_no_scored_gene():
+    """A relocate-set reaction whose genes are all absent from scores is reported, not crashed."""
+    m = _toy_two_compartment_model()
+    # Only g1 has scores; g2 (r2's gene) is absent → r2 is unplaceable.
+    scores = LocalizationScores(pd.DataFrame(
+        {"c": [1.0], "m": [0.0]}, index=pd.Index(["g1"], name="gene_id")))
+    res = predict_localization(m, scores, ["r2"], apply=False)
+    assert isinstance(res, LocalizationProposal)
+    assert "r2" in res.unplaced_reactions
+    assert res.moved.empty   # nothing actually placed
+
+
+def test_predict_boundary_reactions_always_pinned():
+    """Boundary reactions in the relocate set are silently filtered out."""
+    m = _toy_two_compartment_model()
+    scores = LocalizationScores(pd.DataFrame(
+        {"c": [1.0, 0.0], "m": [0.0, 1.0]}, index=pd.Index(["g1", "g2"], name="gene_id")))
+    res = predict_localization(m, scores, ["EX_A", "EX_C", "r2"], apply=False,
+                                transport_cost=0.1)
+    # Only r2 should appear in the proposal — boundaries dropped.
+    assert set(res.moved["rxn_id"]) == {"r2"}
+
+
+def test_predict_default_compartment_validated():
+    m = _toy_two_compartment_model()
+    scores = LocalizationScores(pd.DataFrame(
+        {"c": [1.0], "m": [0.0]}, index=pd.Index(["g2"], name="gene_id")))
+    with pytest.raises(ValueError, match="default_compartment"):
+        predict_localization(m, scores, ["r2"], default_compartment="x", apply=False)
+
+
+def test_apply_localization_idempotent_on_empty_proposal():
+    """An empty proposal (no moves, no transports) shouldn't change the model."""
+    m = _toy_two_compartment_model()
+    empty = LocalizationProposal(
+        moved=pd.DataFrame(columns=["rxn_id", "from_compartment", "to_compartment"]),
+        added_transports=pd.DataFrame(columns=["met_id", "compartment"]),
+        gene_compartments={})
+    out, added = apply_localization(m, empty)
+    assert len(out.reactions) == len(m.reactions)
+    assert added == []
+
+
+# ----------------------------------------- multi-compartment scoring (NEW)
+
+def test_predict_multi_compartment_when_secondary_score_beats_penalty():
+    """Dual-localised gene: secondary compartment score 0.8 > penalty 0.3 → gene lands in
+    both compartments. Two reactions sharing one gene each placed in their best
+    compartment without contradicting the gene assignment."""
+    m = cobra.Model("dual")
+    A_c = cobra.Metabolite("A_c", compartment="c")
+    B_c = cobra.Metabolite("B_c", compartment="c")
+    m.add_metabolites([A_c, B_c])
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)
+    r1.add_metabolites({A_c: -1, B_c: 1})
+    r1.gene_reaction_rule = "g_dual"
+    r2 = cobra.Reaction("r2", lower_bound=0, upper_bound=1000)
+    r2.add_metabolites({A_c: -1, B_c: 1})
+    r2.gene_reaction_rule = "g_dual"
+    m.add_reactions([r1, r2])
+    # g_dual scores: c=1.0 (primary), m=0.8 (secondary). Penalty 0.3 — secondary worth it.
+    scores = LocalizationScores(pd.DataFrame(
+        {"c": [1.0], "m": [0.8]}, index=pd.Index(["g_dual"], name="gene_id")))
+    res = predict_localization(m, scores, ["r2"], default_compartment="c", apply=False,
+                                transport_cost=0.0, multi_compartment_penalty=0.3)
+    # The gene should land in BOTH c and m (primary free + 0.8 - 0.3 > 0 for secondary).
+    assert set(res.gene_compartments["g_dual"]) == {"c", "m"}
+
+
+def test_predict_mono_when_secondary_score_below_penalty():
+    """Same as above but penalty 0.9 > secondary score 0.8 → gene mono-localises (c only)."""
+    m = cobra.Model("mono")
+    A_c = cobra.Metabolite("A_c", compartment="c")
+    B_c = cobra.Metabolite("B_c", compartment="c")
+    m.add_metabolites([A_c, B_c])
+    r2 = cobra.Reaction("r2", lower_bound=0, upper_bound=1000)
+    r2.add_metabolites({A_c: -1, B_c: 1})
+    r2.gene_reaction_rule = "g_dual"
+    m.add_reactions([r2])
+    scores = LocalizationScores(pd.DataFrame(
+        {"c": [1.0], "m": [0.8]}, index=pd.Index(["g_dual"], name="gene_id")))
+    res = predict_localization(m, scores, ["r2"], default_compartment="c", apply=False,
+                                transport_cost=0.0, multi_compartment_penalty=0.9)
+    # Penalty exceeds secondary score → only the primary compartment.
+    assert res.gene_compartments["g_dual"] == ["c"]
+
+
+def test_predict_high_penalty_forces_mono_localisation():
+    """Very high penalty effectively bans extra compartments."""
+    m = _toy_two_compartment_model()
+    scores = LocalizationScores(pd.DataFrame(
+        {"c": [0.4, 0.3], "m": [0.5, 0.6]},
+        index=pd.Index(["g1", "g2"], name="gene_id")))
+    res = predict_localization(m, scores, ["r1", "r2"], default_compartment="c", apply=False,
+                                transport_cost=0.0, multi_compartment_penalty=1000.0)
+    # With penalty so high, every gene gets exactly one compartment.
+    for g, comps in res.gene_compartments.items():
+        assert len(comps) == 1, f"{g} landed in {comps} with prohibitive penalty"
diff --git a/tests/test_manipulation_add.py b/tests/test_manipulation_add.py
new file mode 100644
index 0000000..2a3a9d3
--- /dev/null
+++ b/tests/test_manipulation_add.py
@@ -0,0 +1,278 @@
+"""Tests for raven_python.manipulation.add (addRxns port)."""
+import cobra
+import pytest
+
+from raven_python.manipulation import add_reactions_from_equations
+from raven_python.utils.parse import parse_name_comp
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("t")
+    m.add_metabolites(
+        [
+            cobra.Metabolite("atp_c", name="ATP", compartment="c"),
+            cobra.Metabolite("h2o_c", name="H2O", compartment="c"),
+            cobra.Metabolite("adp_c", name="ADP", compartment="c"),
+            cobra.Metabolite("pi_c", name="phosphate", compartment="c"),
+        ]
+    )
+    return m
+
+
+# --- parse_name_comp -------------------------------------------------------
+
+@pytest.mark.parametrize(
+    "token,expected",
+    [
+        ("ATP[c]", ("ATP", "c")),
+        ("ATP", ("ATP", None)),
+        ("  ATP[c] ", ("ATP", "c")),
+        ("weird[name][m]", ("weird[name]", "m")),
+    ],
+)
+def test_parse_name_comp(token, expected):
+    assert parse_name_comp(token) == expected
+
+
+# --- id mode (eqnType 1) ---------------------------------------------------
+
+def test_add_by_id_basic_and_reversibility(model):
+    (rxn,) = add_reactions_from_equations(
+        model, [{"id": "R1", "equation": "atp_c + h2o_c <=> adp_c + pi_c"}]
+    )
+    assert rxn.id == "R1"
+    assert rxn.reversibility is True
+    assert {m.id: rxn.get_coefficient(m.id) for m in rxn.metabolites} == {
+        "atp_c": -1.0,
+        "h2o_c": -1.0,
+        "adp_c": 1.0,
+        "pi_c": 1.0,
+    }
+
+
+def test_irreversible_arrows(model):
+    rxns = add_reactions_from_equations(
+        model,
+        [
+            {"id": "R1", "equation": "atp_c --> adp_c"},
+            {"id": "R2", "equation": "atp_c => adp_c"},
+        ],
+    )
+    for r in rxns:
+        assert r.lower_bound == 0.0
+        assert r.reversibility is False
+
+
+def test_coefficients(model):
+    (rxn,) = add_reactions_from_equations(
+        model, [{"id": "R1", "equation": "2 atp_c + 1.5 h2o_c --> adp_c"}]
+    )
+    assert rxn.get_coefficient("atp_c") == -2.0
+    assert rxn.get_coefficient("h2o_c") == -1.5
+
+
+def test_id_mode_creates_new_met_in_compartment(model):
+    add_reactions_from_equations(
+        model,
+        [{"id": "R1", "equation": "atp_c --> amp_c"}],
+        compartment="c",
+    )
+    assert "amp_c" in model.metabolites
+    assert model.metabolites.get_by_id("amp_c").compartment == "c"
+
+
+def test_id_mode_new_met_without_compartment_errors(model):
+    with pytest.raises(ValueError, match="no compartment"):
+        add_reactions_from_equations(model, [{"id": "R1", "equation": "atp_c --> amp_c"}])
+
+
+# --- name mode (eqnType 2) -------------------------------------------------
+
+def test_name_mode_matches_existing_by_name(model):
+    (rxn,) = add_reactions_from_equations(
+        model,
+        [{"id": "R1", "equation": "ATP + H2O <=> ADP + phosphate"}],
+        mets_by="name",
+        compartment="c",
+    )
+    # resolved to the existing _c metabolites, not new ones
+    assert {m.id for m in rxn.metabolites} == {"atp_c", "h2o_c", "adp_c", "pi_c"}
+    assert len(model.metabolites) == 4
+
+
+def test_name_mode_creates_new_met_with_auto_id(model):
+    add_reactions_from_equations(
+        model,
+        [{"id": "R1", "equation": "ATP --> AMP"}],
+        mets_by="name",
+        compartment="c",
+    )
+    new = [m for m in model.metabolites if m.name == "AMP"]
+    assert len(new) == 1
+    assert new[0].id == "m1"
+    assert new[0].compartment == "c"
+
+
+def test_name_mode_requires_compartment(model):
+    with pytest.raises(ValueError, match="needs a compartment"):
+        add_reactions_from_equations(
+            model, [{"id": "R1", "equation": "ATP --> ADP"}], mets_by="name"
+        )
+
+
+# --- name[comp] mode (eqnType 3) -------------------------------------------
+
+def test_name_comp_syntax(model):
+    model.add_metabolites([cobra.Metabolite("atp_m", name="ATP", compartment="m")])
+    (rxn,) = add_reactions_from_equations(
+        model,
+        [{"id": "R1", "equation": "ATP[c] --> ATP[m]"}],
+        mets_by="name",
+        compartment="c",
+    )
+    # matched ATP in two different compartments by name[comp]
+    assert {m.id for m in rxn.metabolites} == {"atp_c", "atp_m"}
+
+
+# --- genes -----------------------------------------------------------------
+
+def test_gene_rule_auto_creates_genes(model):
+    (rxn,) = add_reactions_from_equations(
+        model,
+        [{"id": "R1", "equation": "atp_c --> adp_c", "gene_reaction_rule": "G1 and G2"}],
+    )
+    assert {g.id for g in rxn.genes} == {"G1", "G2"}
+    assert {g.id for g in model.genes} == {"G1", "G2"}
+
+
+def test_strict_genes_errors_on_unknown(model):
+    with pytest.raises(ValueError, match="genes not in the model"):
+        add_reactions_from_equations(
+            model,
+            [{"id": "R1", "equation": "atp_c --> adp_c", "gene_reaction_rule": "G1"}],
+            allow_new_genes=False,
+        )
+
+
+def test_strict_genes_ok_when_present(model):
+    model.genes.append(cobra.core.gene.Gene("G1"))
+    (rxn,) = add_reactions_from_equations(
+        model,
+        [{"id": "R1", "equation": "atp_c --> adp_c", "gene_reaction_rule": "G1"}],
+        allow_new_genes=False,
+    )
+    assert rxn.gene_reaction_rule == "G1"
+
+
+# --- guards & extras -------------------------------------------------------
+
+def test_duplicate_reaction_id_errors(model):
+    model.add_reactions([cobra.Reaction("R1")])
+    with pytest.raises(ValueError, match="already exists"):
+        add_reactions_from_equations(model, [{"id": "R1", "equation": "atp_c --> adp_c"}])
+
+
+def test_strict_mets_errors(model):
+    with pytest.raises(ValueError, match="allow_new_mets"):
+        add_reactions_from_equations(
+            model,
+            [{"id": "R1", "equation": "atp_c --> amp_c"}],
+            compartment="c",
+            allow_new_mets=False,
+        )
+
+
+def test_explicit_bounds_override_arrow(model):
+    (rxn,) = add_reactions_from_equations(
+        model,
+        [{"id": "R1", "equation": "atp_c <=> adp_c", "bounds": (0, 50), "name": "myrxn"}],
+    )
+    assert rxn.bounds == (0, 50)
+    assert rxn.name == "myrxn"
+
+
+def test_net_zero_metabolite_dropped(model):
+    # atp_c on both sides nets to zero and is removed.
+    (rxn,) = add_reactions_from_equations(
+        model, [{"id": "R1", "equation": "atp_c + h2o_c --> atp_c + adp_c"}]
+    )
+    assert "atp_c" not in {m.id for m in rxn.metabolites}
+    assert {m.id for m in rxn.metabolites} == {"h2o_c", "adp_c"}
+
+
+def test_missing_equation_errors(model):
+    with pytest.raises(ValueError, match="missing required 'equation'"):
+        add_reactions_from_equations(model, [{"id": "R1"}])
+
+
+def test_no_arrow_errors(model):
+    with pytest.raises(ValueError, match="No reaction arrow"):
+        add_reactions_from_equations(model, [{"id": "R1", "equation": "atp_c + h2o_c"}])
+
+
+# --- regression: leading-number metabolite name (known_issues.md A1) -------
+
+def test_name_mode_preserves_leading_number_name(model):
+    """A metabolite name that begins with a number isn't misparsed as a coefficient.
+
+    Before the fix the token ``"2 oxoglutarate"`` was parsed as ``(coeff=2, name="oxoglutarate")``
+    silently — corrupting the stoichiometry. The resolver now prefers the full
+    token when it matches an existing metabolite name.
+    """
+    model.add_metabolites([
+        cobra.Metabolite("akg_c", name="2 oxoglutarate", compartment="c"),
+    ])
+    (rxn,) = add_reactions_from_equations(
+        model,
+        [{"id": "R1", "equation": "ATP + 2 oxoglutarate --> ADP"}],
+        mets_by="name",
+        compartment="c",
+    )
+    assert rxn.get_coefficient("akg_c") == -1.0  # not -2.0
+    assert rxn.get_coefficient("atp_c") == -1.0
+
+
+def test_name_mode_coefficient_still_works_without_collision(model):
+    """If the full token doesn't match anything, fall back to coefficient split."""
+    (rxn,) = add_reactions_from_equations(
+        model,
+        [{"id": "R1", "equation": "2 ATP + H2O --> ADP + phosphate"}],
+        mets_by="name",
+        compartment="c",
+    )
+    assert rxn.get_coefficient("atp_c") == -2.0
+
+
+# --- regression: empty-stoichiometry warning (known_issues.md A2) ----------
+
+def test_empty_stoichiometry_warns(model):
+    """All-terms-cancel reaction warns instead of silently shipping an empty rxn."""
+    with pytest.warns(UserWarning, match="no net metabolites"):
+        (rxn,) = add_reactions_from_equations(
+            model, [{"id": "R1", "equation": "atp_c --> atp_c"}]
+        )
+    assert len(rxn.metabolites) == 0
+
+
+# --- regression: unknown-compartment warning (known_issues.md B2) ----------
+
+def test_id_mode_unknown_compartment_warns(model):
+    """A typo'd compartment used to silently produce a one-met ghost compartment
+    in id mode (the name/[comp] path used to validate, id mode never did)."""
+    with pytest.warns(UserWarning, match="unregistered compartment 'cyto'"):
+        add_reactions_from_equations(
+            model,
+            [{"id": "R1", "equation": "atp_c --> amp_c"}],
+            compartment="cyto",  # typo for 'c'
+        )
+
+
+def test_name_comp_unknown_compartment_warns(model):
+    """Same defensive check in the name[comp] path when allow_new_mets=True."""
+    with pytest.warns(UserWarning, match="unregistered compartment 'mito'"):
+        add_reactions_from_equations(
+            model,
+            [{"id": "R1", "equation": "ATP[c] --> AMP[mito]"}],
+            mets_by="name",
+        )
diff --git a/tests/test_manipulation_change.py b/tests/test_manipulation_change.py
new file mode 100644
index 0000000..8d54f58
--- /dev/null
+++ b/tests/test_manipulation_change.py
@@ -0,0 +1,93 @@
+"""Tests for raven_python.manipulation.change (changeRxns port)."""
+import cobra
+import pytest
+
+from raven_python.manipulation import add_reactions_from_equations, change_reaction_equations
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("t")
+    m.add_metabolites(
+        [
+            cobra.Metabolite("a_c", name="A", compartment="c"),
+            cobra.Metabolite("b_c", name="B", compartment="c"),
+            cobra.Metabolite("c_c", name="C", compartment="c"),
+        ]
+    )
+    add_reactions_from_equations(
+        m,
+        [
+            {
+                "id": "R1",
+                "equation": "a_c <=> b_c",
+                "name": "first",
+                "bounds": (-30, 70),
+                "gene_reaction_rule": "G1 or G2",
+                "subsystem": "sub",
+            },
+            {"id": "R2", "equation": "a_c --> c_c"},
+        ],
+    )
+    return m
+
+
+def test_changes_stoichiometry(model):
+    (rxn,) = change_reaction_equations(model, {"R1": "a_c --> 2 c_c"})
+    assert rxn.id == "R1"
+    assert {m.id: rxn.get_coefficient(m.id) for m in rxn.metabolites} == {
+        "a_c": -1.0,
+        "c_c": 2.0,
+    }
+
+
+def test_preserves_other_fields(model):
+    before = model.reactions.get_by_id("R1")
+    name, bounds, subsystem = before.name, before.bounds, before.subsystem
+    genes = {g.id for g in before.genes}
+
+    change_reaction_equations(model, {"R1": "a_c --> c_c"})
+
+    after = model.reactions.get_by_id("R1")
+    assert after.name == name
+    assert after.bounds == bounds  # bounds untouched, per RAVEN
+    assert after.subsystem == subsystem
+    assert {g.id for g in after.genes} == genes
+
+
+def test_preserves_reaction_order(model):
+    order_before = [r.id for r in model.reactions]
+    change_reaction_equations(model, {"R1": "b_c --> c_c"})
+    assert [r.id for r in model.reactions] == order_before
+
+
+def test_bounds_not_changed_by_arrow(model):
+    # R1 starts reversible (-30, 70); a --> arrow must NOT make it irreversible.
+    change_reaction_equations(model, {"R1": "a_c --> b_c"})
+    assert model.reactions.get_by_id("R1").bounds == (-30, 70)
+
+
+def test_name_mode(model):
+    (rxn,) = change_reaction_equations(
+        model, {"R2": "A --> C"}, mets_by="name", compartment="c"
+    )
+    assert {m.id for m in rxn.metabolites} == {"a_c", "c_c"}
+
+
+def test_can_introduce_new_met(model):
+    change_reaction_equations(
+        model, {"R2": "a_c --> d_c"}, compartment="c"
+    )
+    assert "d_c" in model.metabolites
+    assert model.reactions.get_by_id("R2").get_coefficient("d_c") == 1.0
+
+
+def test_unknown_reaction_errors(model):
+    with pytest.raises(ValueError, match="not found"):
+        change_reaction_equations(model, {"NOPE": "a_c --> b_c"})
+
+
+def test_multiple_reactions(model):
+    changed = change_reaction_equations(model, {"R1": "a_c --> c_c", "R2": "b_c --> c_c"})
+    assert [r.id for r in changed] == ["R1", "R2"]
+    assert model.reactions.get_by_id("R2").get_coefficient("b_c") == -1.0
diff --git a/tests/test_manipulation_compartments.py b/tests/test_manipulation_compartments.py
new file mode 100644
index 0000000..4d3fb3b
--- /dev/null
+++ b/tests/test_manipulation_compartments.py
@@ -0,0 +1,139 @@
+"""Tests for manipulation/compartments.py — merge_compartments + copy_to_compartment."""
+from __future__ import annotations
+
+import cobra
+import pytest
+
+from raven_python.manipulation.compartments import copy_to_compartment, merge_compartments
+
+
+def _two_compartment_model() -> cobra.Model:
+    """A_c → B_c, A_m → B_m, and a transport A_c ↔ A_m. Multi-compartment toy."""
+    m = cobra.Model("toy")
+    A_c = cobra.Metabolite("A_c", name="A", compartment="c")
+    A_m = cobra.Metabolite("A_m", name="A", compartment="m")
+    B_c = cobra.Metabolite("B_c", name="B", compartment="c")
+    B_m = cobra.Metabolite("B_m", name="B", compartment="m")
+    m.add_metabolites([A_c, A_m, B_c, B_m])
+
+    def rxn(rid, lb, ub, mets, gpr=None):
+        r = cobra.Reaction(rid, lower_bound=lb, upper_bound=ub)
+        r.add_metabolites(mets)
+        if gpr:
+            r.gene_reaction_rule = gpr
+        return r
+    m.add_reactions([rxn("r_c", 0, 1000, {A_c: -1, B_c: 1}, "g1"),
+                     rxn("r_m", 0, 1000, {A_m: -1, B_m: 1}, "g2"),
+                     rxn("tr_A", -1000, 1000, {A_c: -1, A_m: 1})])
+    return m
+
+
+# ----------------------------------------------------------------- merge_compartments
+
+def test_merge_compartments_collapses_to_one():
+    """A_c + A_m → A; B_c + B_m → B; transport A_c↔A_m self-cancels and is dropped."""
+    m = _two_compartment_model()
+    merged, deleted, dupes = merge_compartments(m)
+    # Only the base ids survive.
+    assert {x.id for x in merged.metabolites} == {"A", "B"}
+    # The transport reaction collapsed (A → A) and was deleted.
+    assert "tr_A" in deleted
+    # r_c and r_m are now both A → B; one of them gets deduplicated.
+    surviving = {r.id for r in merged.reactions}
+    assert len(surviving & {"r_c", "r_m"}) == 1
+    assert (set(dupes) | (surviving & {"r_c", "r_m"})) == {"r_c", "r_m"}
+
+
+def test_merge_compartments_preserves_gpr_and_subsystem():
+    m = _two_compartment_model()
+    m.reactions.r_c.subsystem = "carbo"
+    merged, _, _ = merge_compartments(m)
+    survivor = next(r for r in merged.reactions if r.id in {"r_c", "r_m"})
+    # The survivor keeps its gene rule + subsystem (cobra may sometimes lose them
+    # through copy; we set them explicitly).
+    assert survivor.gene_reaction_rule in {"g1", "g2"}
+    if survivor.id == "r_c":
+        assert survivor.subsystem == "carbo"
+
+
+def test_merge_compartments_keeps_single_met_reactions_when_asked():
+    """drop_single_metabolite_reactions=False keeps the collapsed transport (now A → A,
+    which is empty stoichiometry after net-cancellation — still dropped, but the *one-met*
+    case is the more interesting one). Use a uniport pattern to exercise it."""
+    m = cobra.Model("uniport")
+    A_c = cobra.Metabolite("A_c", name="A", compartment="c")
+    A_m = cobra.Metabolite("A_m", name="A", compartment="m")
+    H_c = cobra.Metabolite("H_c", name="H", compartment="c")
+    m.add_metabolites([A_c, A_m, H_c])
+    # H+ symport: A_c + H_c → A_m. After merge: A + H → A → leaves H.
+    sym = cobra.Reaction("sym", lower_bound=0, upper_bound=1000)
+    sym.add_metabolites({A_c: -1, H_c: -1, A_m: 1})
+    m.add_reactions([sym])
+    merged_drop, deleted_drop, _ = merge_compartments(m, drop_single_metabolite_reactions=True)
+    assert "sym" in deleted_drop
+    merged_keep, deleted_keep, _ = merge_compartments(m, drop_single_metabolite_reactions=False)
+    # With keep, sym survives as a one-met reaction (consumes H).
+    assert "sym" not in deleted_keep
+    assert "sym" in {r.id for r in merged_keep.reactions}
+
+
+def test_merge_compartments_deduplicate_off_keeps_both():
+    m = _two_compartment_model()
+    merged, _, dupes = merge_compartments(m, deduplicate_reactions=False)
+    assert dupes == []
+    assert {"r_c", "r_m"} <= {r.id for r in merged.reactions}
+
+
+# ----------------------------------------------------------------- copy_to_compartment
+
+def test_copy_to_compartment_basic():
+    """Copy r_c into 'p' (peroxisome): a new reaction r_c_p with metabolites in p."""
+    m = _two_compartment_model()
+    out, new_rxns, new_mets = copy_to_compartment(m, ["r_c"], "p",
+                                                    target_compartment_name="peroxisome")
+    assert "r_c_p" in [r.id for r in out.reactions]
+    new_r = out.reactions.r_c_p
+    assert {x.compartment for x in new_r.metabolites} == {"p"}
+    assert "A_p" in [x.id for x in out.metabolites]
+    assert "B_p" in [x.id for x in out.metabolites]
+    assert new_rxns == ["r_c_p"]
+    assert set(new_mets) == {"A_p", "B_p"}
+    # Original still there.
+    assert "r_c" in [r.id for r in out.reactions]
+
+
+def test_copy_to_compartment_preserves_gpr_and_bounds():
+    m = _two_compartment_model()
+    out, _, _ = copy_to_compartment(m, ["r_c"], "p")
+    new_r = out.reactions.r_c_p
+    assert new_r.gene_reaction_rule == "g1"
+    assert new_r.lower_bound == 0 and new_r.upper_bound == 1000
+
+
+def test_copy_to_compartment_delete_original_is_a_move():
+    m = _two_compartment_model()
+    out, _, _ = copy_to_compartment(m, ["r_c"], "p", delete_original=True)
+    assert "r_c" not in [r.id for r in out.reactions]
+    assert "r_c_p" in [r.id for r in out.reactions]
+
+
+def test_copy_to_compartment_idempotent():
+    """Calling twice doesn't add the reaction twice."""
+    m = _two_compartment_model()
+    out, _, _ = copy_to_compartment(m, ["r_c"], "p")
+    out2, new_rxns, _ = copy_to_compartment(out, ["r_c"], "p")
+    assert new_rxns == []  # nothing added on second call
+    assert len([r for r in out2.reactions if r.id == "r_c_p"]) == 1
+
+
+def test_copy_to_compartment_unknown_reaction_raises():
+    m = _two_compartment_model()
+    with pytest.raises(ValueError, match="not in model"):
+        copy_to_compartment(m, ["does_not_exist"], "p")
+
+
+def test_copy_to_compartment_custom_suffix():
+    m = _two_compartment_model()
+    out, new_rxns, _ = copy_to_compartment(m, ["r_c"], "p", id_suffix="copy1")
+    assert new_rxns == ["r_c_copy1"]
+    assert "A_copy1" in [x.id for x in out.metabolites]
diff --git a/tests/test_manipulation_expand.py b/tests/test_manipulation_expand.py
new file mode 100644
index 0000000..08cd2f2
--- /dev/null
+++ b/tests/test_manipulation_expand.py
@@ -0,0 +1,288 @@
+"""Tests for expand_model (RAVEN expandModel.m) — splitting isozymes into reactions.
+
+Adopted from geckopy's tests/test_expand.py.
+"""
+import cobra
+
+from raven_python.manipulation import expand_model
+from raven_python.manipulation.expand import _gpr_to_dnf
+
+# --------------------------------------------------------------------------- #
+# DNF conversion (internal helper, worth testing directly)
+# --------------------------------------------------------------------------- #
+
+def _dnf_from_gpr_string(gpr_str: str) -> list[list[str]]:
+    from cobra.core.gene import GPR
+
+    gpr = GPR.from_string(gpr_str)
+    return _gpr_to_dnf(gpr)
+
+
+def test_dnf_empty_gpr():
+    assert _dnf_from_gpr_string("") == []
+
+
+def test_dnf_single_gene():
+    assert _dnf_from_gpr_string("g1") == [["g1"]]
+
+
+def test_dnf_simple_and():
+    assert _dnf_from_gpr_string("g1 and g2") == [["g1", "g2"]]
+
+
+def test_dnf_simple_or():
+    assert _dnf_from_gpr_string("g1 or g2") == [["g1"], ["g2"]]
+
+
+def test_dnf_or_of_ands():
+    assert _dnf_from_gpr_string("(g1 and g2) or (g3 and g4)") == [
+        ["g1", "g2"],
+        ["g3", "g4"],
+    ]
+
+
+def test_dnf_distributes_and_over_or():
+    result = _dnf_from_gpr_string("g1 and (g2 or g3)")
+    assert result == [["g1", "g2"], ["g1", "g3"]]
+
+
+def test_dnf_triple_or():
+    assert _dnf_from_gpr_string("g1 or g2 or g3") == [
+        ["g1"], ["g2"], ["g3"],
+    ]
+
+
+def test_dnf_preserves_gene_order_within_clause():
+    result = _dnf_from_gpr_string("g3 and g1 and g2")
+    assert result == [["g3", "g1", "g2"]]
+
+
+# --------------------------------------------------------------------------- #
+# expand_model
+# --------------------------------------------------------------------------- #
+
+def _build_model(
+    reactions: list[tuple[str, dict[str, float], float, float, str]],
+) -> cobra.Model:
+    """Build from (rxn_id, {met_id: coef}, lb, ub, gpr) tuples."""
+    model = cobra.Model("test")
+    mets: dict[str, cobra.Metabolite] = {}
+    for _, stoich, _, _, _ in reactions:
+        for met_id in stoich:
+            if met_id not in mets:
+                mets[met_id] = cobra.Metabolite(met_id, compartment="c")
+
+    for rxn_id, stoich, lb, ub, gpr in reactions:
+        rxn = cobra.Reaction(rxn_id)
+        rxn.lower_bound = lb
+        rxn.upper_bound = ub
+        rxn.add_metabolites({mets[m]: c for m, c in stoich.items()})
+        if gpr:
+            rxn.gene_reaction_rule = gpr
+        model.add_reactions([rxn])
+    return model
+
+
+def test_does_not_expand_reaction_without_gpr():
+    model = _build_model([("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0, "")])
+    added = expand_model(model)
+    assert added == []
+    assert "r1" in {r.id for r in model.reactions}
+
+
+def test_does_not_expand_single_and_clause():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0, "g1 and g2"),
+    ])
+    added = expand_model(model)
+    assert added == []
+    r1 = model.reactions.get_by_id("r1")
+    assert r1.gene_reaction_rule == "g1 and g2"
+
+
+def test_does_not_expand_single_gene():
+    model = _build_model([("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0, "g1")])
+    added = expand_model(model)
+    assert added == []
+    assert model.reactions.get_by_id("r1").gene_reaction_rule == "g1"
+
+
+def test_splits_simple_or_into_two_reactions():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0, "g1 or g2"),
+    ])
+    added = expand_model(model)
+
+    assert added == ["r1_EXP_1", "r1_EXP_2"]
+    rxn_ids = {r.id for r in model.reactions}
+    assert "r1" not in rxn_ids
+    assert "r1_EXP_1" in rxn_ids
+    assert "r1_EXP_2" in rxn_ids
+
+    assert model.reactions.get_by_id("r1_EXP_1").gene_reaction_rule == "g1"
+    assert model.reactions.get_by_id("r1_EXP_2").gene_reaction_rule == "g2"
+
+
+def test_splits_or_of_ands():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0,
+         "(g1 and g2) or (g3 and g4)"),
+    ])
+    added = expand_model(model)
+
+    assert added == ["r1_EXP_1", "r1_EXP_2"]
+    assert model.reactions.get_by_id("r1_EXP_1").gene_reaction_rule == "g1 and g2"
+    assert model.reactions.get_by_id("r1_EXP_2").gene_reaction_rule == "g3 and g4"
+
+
+def test_distributes_and_over_or():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0,
+         "g1 and (g2 or g3)"),
+    ])
+    added = expand_model(model)
+
+    assert added == ["r1_EXP_1", "r1_EXP_2"]
+    assert model.reactions.get_by_id("r1_EXP_1").gene_reaction_rule == "g1 and g2"
+    assert model.reactions.get_by_id("r1_EXP_2").gene_reaction_rule == "g1 and g3"
+
+
+def test_expanded_reactions_inherit_stoichiometry_and_bounds():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 2.0}, -500.0, 1500.0, "g1 or g2"),
+    ])
+    expand_model(model)
+
+    for suffix in ("_EXP_1", "_EXP_2"):
+        rxn = model.reactions.get_by_id(f"r1{suffix}")
+        assert rxn.bounds == (-500.0, 1500.0)
+        stoich = {m.id: c for m, c in rxn.metabolites.items()}
+        assert stoich == {"A": -1.0, "B": 2.0}
+
+
+def test_expanded_reactions_inherit_name_and_subsystem():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0, "g1 or g2"),
+    ])
+    r1 = model.reactions.get_by_id("r1")
+    r1.name = "an isozyme-catalyzed reaction"
+    r1.subsystem = "central metabolism"
+
+    expand_model(model)
+
+    for suffix in ("_EXP_1", "_EXP_2"):
+        rxn = model.reactions.get_by_id(f"r1{suffix}")
+        assert rxn.name == "an isozyme-catalyzed reaction"
+        assert rxn.subsystem == "central metabolism"
+
+
+def test_multiple_reactions_expand_independently():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0, "g1 or g2"),
+        ("r2", {"B": -1.0, "C": 1.0}, 0.0, 1000.0, "g3 and g4"),
+        ("r3", {"C": -1.0, "D": 1.0}, 0.0, 1000.0,
+         "(g5 and g6) or g7 or (g8 and g9)"),
+    ])
+    added = expand_model(model)
+
+    assert added == sorted([
+        "r1_EXP_1", "r1_EXP_2",
+        "r3_EXP_1", "r3_EXP_2", "r3_EXP_3",
+    ])
+
+    rxn_ids = {r.id for r in model.reactions}
+    assert "r2" in rxn_ids
+    assert "r1" not in rxn_ids
+    assert "r3" not in rxn_ids
+
+    assert model.reactions.get_by_id("r2").gene_reaction_rule == "g3 and g4"
+    assert model.reactions.get_by_id("r3_EXP_1").gene_reaction_rule == "g5 and g6"
+    assert model.reactions.get_by_id("r3_EXP_2").gene_reaction_rule == "g7"
+    assert model.reactions.get_by_id("r3_EXP_3").gene_reaction_rule == "g8 and g9"
+
+
+def test_expanded_reaction_has_correct_gene_set():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0,
+         "(g1 and g2) or (g3 and g4)"),
+    ])
+    expand_model(model)
+
+    r1_1 = model.reactions.get_by_id("r1_EXP_1")
+    assert {g.id for g in r1_1.genes} == {"g1", "g2"}
+
+    r1_2 = model.reactions.get_by_id("r1_EXP_2")
+    assert {g.id for g in r1_2.genes} == {"g3", "g4"}
+
+
+def test_expansion_is_idempotent_in_the_no_op_sense():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0, "g1 or g2"),
+        ("r2", {"B": -1.0, "C": 1.0}, 0.0, 1000.0, "g3 and g4"),
+    ])
+    expand_model(model)
+    ids_before = {r.id for r in model.reactions}
+
+    second = expand_model(model)
+    assert second == []
+
+    ids_after = {r.id for r in model.reactions}
+    assert ids_after == ids_before
+
+
+def test_empty_model_is_unchanged():
+    model = cobra.Model("empty")
+    assert expand_model(model) == []
+
+
+# --------------------------------------------------------------------------- #
+# Annotation and notes propagation
+# --------------------------------------------------------------------------- #
+
+def test_expanded_reactions_inherit_annotation_and_notes():
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0, "g1 or g2"),
+    ])
+    r1 = model.reactions.get_by_id("r1")
+    r1.annotation["ec-code"] = "1.2.3.4"
+    r1.annotation["sbo"] = "SBO:0000176"
+    r1.notes["custom"] = "hello"
+
+    expand_model(model)
+
+    for suffix in ("_EXP_1", "_EXP_2"):
+        rxn = model.reactions.get_by_id(f"r1{suffix}")
+        assert rxn.annotation["ec-code"] == "1.2.3.4"
+        assert rxn.annotation["sbo"] == "SBO:0000176"
+        assert rxn.notes["custom"] == "hello"
+
+
+def test_expanded_reaction_annotation_is_independent_of_parent():
+    """Mutating one expanded reaction's annotation must not affect siblings."""
+    model = _build_model([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0, "g1 or g2"),
+    ])
+    model.reactions.get_by_id("r1").annotation["ec-code"] = ["1.2.3.4"]
+
+    expand_model(model)
+
+    r1_1 = model.reactions.get_by_id("r1_EXP_1")
+    r1_2 = model.reactions.get_by_id("r1_EXP_2")
+    r1_1.annotation["ec-code"].append("9.9.9.9")
+    assert r1_2.annotation["ec-code"] == ["1.2.3.4"]
+
+
+def test_objective_coefficient_preserved_on_expansion():
+    """An expanded reaction's isozyme copies retain the original objective coefficient."""
+    m = cobra.Model("o")
+    a, b = (cobra.Metabolite(x, compartment="c") for x in "ab")
+    m.add_metabolites([a, b])
+    r = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)
+    r.add_metabolites({a: -1, b: 1})
+    r.gene_reaction_rule = "g1 or g2"
+    m.add_reactions([r])
+    m.objective = "r1"  # objective on the soon-to-be-expanded reaction
+
+    expand_model(m)
+    coeffs = {rx.id: rx.objective_coefficient for rx in m.reactions}
+    assert coeffs == {"r1_EXP_1": 1.0, "r1_EXP_2": 1.0}  # objective survives on both copies
diff --git a/tests/test_manipulation_irreversible.py b/tests/test_manipulation_irreversible.py
new file mode 100644
index 0000000..e211fa3
--- /dev/null
+++ b/tests/test_manipulation_irreversible.py
@@ -0,0 +1,144 @@
+"""Tests for convert_to_irreversible (RAVEN convertToIrrev.m).
+
+Adopted from geckopy's tests/test_preprocess.py (the convert_to_irreversible subset).
+Exchange reactions are excluded from the split, matching MATLAB behavior.
+"""
+import cobra
+
+from raven_python.manipulation import convert_to_irreversible
+
+
+def _build_model_with_bounds(
+    reactions: list[tuple[str, dict[str, float], float, float]],
+) -> cobra.Model:
+    """Build from (rxn_id, {met_id: coef}, lb, ub) tuples."""
+    model = cobra.Model("test")
+    mets: dict[str, cobra.Metabolite] = {}
+    for _, stoich, _, _ in reactions:
+        for met_id in stoich:
+            if met_id not in mets:
+                mets[met_id] = cobra.Metabolite(met_id, compartment="c")
+
+    for rxn_id, stoich, lb, ub in reactions:
+        rxn = cobra.Reaction(rxn_id)
+        rxn.lower_bound = lb
+        rxn.upper_bound = ub
+        rxn.add_metabolites({mets[m]: c for m, c in stoich.items()})
+        model.add_reactions([rxn])
+    return model
+
+
+def test_splits_single_reversible_non_exchange():
+    model = _build_model_with_bounds([
+        ("r1", {"A": -1.0, "B": 1.0}, -500.0, 1000.0),
+    ])
+
+    added = convert_to_irreversible(model)
+    assert added == ["r1_REV"]
+
+    fwd = model.reactions.get_by_id("r1")
+    rev = model.reactions.get_by_id("r1_REV")
+
+    assert fwd.bounds == (0.0, 1000.0)
+    assert {m.id: c for m, c in fwd.metabolites.items()} == {"A": -1.0, "B": 1.0}
+
+    assert rev.bounds == (0.0, 500.0)
+    assert {m.id: c for m, c in rev.metabolites.items()} == {"A": 1.0, "B": -1.0}
+
+
+def test_does_not_split_forward_only_reaction():
+    model = _build_model_with_bounds([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0),
+    ])
+    added = convert_to_irreversible(model)
+    assert added == []
+    assert "r1_REV" not in {r.id for r in model.reactions}
+
+
+def test_does_not_split_exchange_reaction_even_if_reversible():
+    """Exchange reactions (one metabolite) are explicitly excluded from
+    the irreversibility step in MATLAB, regardless of bounds."""
+    model = _build_model_with_bounds([
+        ("EX_A", {"A": -1.0}, -1000.0, 1000.0),
+    ])
+    added = convert_to_irreversible(model)
+    assert added == []
+    ex = model.reactions.get_by_id("EX_A")
+    assert ex.bounds == (-1000.0, 1000.0)
+
+
+def test_splits_multiple_mixed_reactions():
+    model = _build_model_with_bounds([
+        ("r1", {"A": -1.0, "B": 1.0}, -500.0, 1000.0),   # split
+        ("r2", {"B": -2.0, "C": 3.0}, 0.0, 1000.0),      # forward only
+        ("EX_A", {"A": -1.0}, -1000.0, 1000.0),          # exchange
+        ("r3", {"C": -1.0, "D": 1.0}, -200.0, 200.0),    # split
+    ])
+
+    added = convert_to_irreversible(model)
+    assert added == ["r1_REV", "r3_REV"]
+
+    assert model.reactions.get_by_id("r1").bounds == (0.0, 1000.0)
+    assert model.reactions.get_by_id("r1_REV").bounds == (0.0, 500.0)
+    assert model.reactions.get_by_id("r2").bounds == (0.0, 1000.0)
+    assert model.reactions.get_by_id("EX_A").bounds == (-1000.0, 1000.0)
+    assert model.reactions.get_by_id("r3").bounds == (0.0, 200.0)
+    assert model.reactions.get_by_id("r3_REV").bounds == (0.0, 200.0)
+
+
+def test_reverse_reaction_inherits_gpr():
+    model = _build_model_with_bounds([
+        ("r1", {"A": -1.0, "B": 1.0}, -500.0, 1000.0),
+    ])
+    model.reactions.get_by_id("r1").gene_reaction_rule = "g1 and g2"
+
+    convert_to_irreversible(model)
+
+    rev = model.reactions.get_by_id("r1_REV")
+    assert rev.gene_reaction_rule == "g1 and g2"
+    assert {g.id for g in rev.genes} == {"g1", "g2"}
+
+
+def test_forward_reaction_lb_is_clamped_to_zero():
+    """After splitting, the original reaction should have lb = 0,
+    which is what MATLAB's convertToIrrev does."""
+    model = _build_model_with_bounds([
+        ("r1", {"A": -1.0, "B": 1.0}, -500.0, 1000.0),
+    ])
+    convert_to_irreversible(model)
+    assert model.reactions.get_by_id("r1").lower_bound == 0.0
+
+
+def test_no_reverse_reaction_has_negative_bound():
+    """After conversion, no non-exchange reaction may carry negative flux."""
+    model = _build_model_with_bounds([
+        ("r1", {"A": -1.0, "B": 1.0}, -500.0, 1000.0),
+        ("r2", {"B": -1.0, "C": 1.0}, -1000.0, 0.0),      # blocked reverse
+        ("EX_A", {"A": -1.0}, -1000.0, 1000.0),
+    ])
+    convert_to_irreversible(model)
+    for rxn in model.reactions:
+        if rxn.boundary:
+            continue
+        assert rxn.lower_bound >= 0, f"{rxn.id} still has lb < 0"
+
+
+def test_returns_empty_list_when_nothing_to_split():
+    model = _build_model_with_bounds([
+        ("r1", {"A": -1.0, "B": 1.0}, 0.0, 1000.0),
+        ("EX_A", {"A": -1.0}, -1000.0, 1000.0),
+    ])
+    assert convert_to_irreversible(model) == []
+
+
+def test_conversion_is_idempotent_after_first_pass():
+    """Running convert_to_irreversible twice should not create
+    `_REV_REV` reactions, because the first pass already clamped
+    all non-exchange lb to 0."""
+    model = _build_model_with_bounds([
+        ("r1", {"A": -1.0, "B": 1.0}, -500.0, 1000.0),
+    ])
+    convert_to_irreversible(model)
+    second = convert_to_irreversible(model)
+    assert second == []
+    assert "r1_REV_REV" not in {r.id for r in model.reactions}
diff --git a/tests/test_manipulation_merge.py b/tests/test_manipulation_merge.py
new file mode 100644
index 0000000..a430f6e
--- /dev/null
+++ b/tests/test_manipulation_merge.py
@@ -0,0 +1,136 @@
+"""Tests for merge_models (mergeModels port)."""
+import cobra
+import pytest
+
+from raven_python.manipulation import add_reactions_from_equations, merge_models
+
+
+def _model(mid, mets, reactions):
+    m = cobra.Model(mid)
+    m.add_metabolites(mets)
+    add_reactions_from_equations(m, reactions)
+    return m
+
+
+@pytest.fixture
+def model_a():
+    return _model(
+        "A",
+        [
+            cobra.Metabolite("glc_c", name="Glucose", compartment="c"),
+            cobra.Metabolite("g6p_c", name="G6P", compartment="c"),
+        ],
+        [{"id": "HEX", "equation": "glc_c --> g6p_c", "gene_reaction_rule": "GA"}],
+    )
+
+
+@pytest.fixture
+def model_b():
+    # same Glucose[c] compound but a DIFFERENT id
+    return _model(
+        "B",
+        [
+            cobra.Metabolite("glucose_c", name="Glucose", compartment="c"),
+            cobra.Metabolite("lac_c", name="Lactate", compartment="c"),
+        ],
+        [{"id": "LDH", "equation": "glucose_c --> lac_c", "gene_reaction_rule": "GB"}],
+    )
+
+
+def test_unifies_metabolites_by_name_comp(model_a, model_b):
+    merged = merge_models([model_a, model_b])
+    glucoses = [m for m in merged.metabolites if m.name == "Glucose" and m.compartment == "c"]
+    assert len(glucoses) == 1  # glc_c and glucose_c unified
+    # both reactions reference the same merged Glucose object
+    hex_glc = [m for m in merged.reactions.get_by_id("HEX").metabolites if m.name == "Glucose"][0]
+    ldh_glc = [m for m in merged.reactions.get_by_id("LDH").metabolites if m.name == "Glucose"][0]
+    assert hex_glc is ldh_glc
+
+
+def test_match_by_id_keeps_distinct(model_a, model_b):
+    merged = merge_models([model_a, model_b], match_by="id")
+    glucoses = [m for m in merged.metabolites if m.name == "Glucose"]
+    assert len(glucoses) == 2  # glc_c and glucose_c are distinct by id
+
+
+def test_all_reactions_kept(model_a, model_b):
+    merged = merge_models([model_a, model_b])
+    assert {"HEX", "LDH"} <= {r.id for r in merged.reactions}
+
+
+def test_reaction_id_collision_renamed(model_a):
+    # two models with the same reaction id but different chemistry
+    other = _model(
+        "B",
+        [cobra.Metabolite("glc_c", name="Glucose", compartment="c"),
+         cobra.Metabolite("x_c", name="X", compartment="c")],
+        [{"id": "HEX", "equation": "glc_c --> x_c"}],
+    )
+    merged = merge_models([model_a, other])
+    assert "HEX" in {r.id for r in merged.reactions}
+    assert "HEX_B" in {r.id for r in merged.reactions}  # renamed with source id
+
+
+def test_genes_merged(model_a, model_b):
+    merged = merge_models([model_a, model_b])
+    assert {"GA", "GB"} <= {g.id for g in merged.genes}
+
+
+def test_provenance_recorded(model_a, model_b):
+    merged = merge_models([model_a, model_b])
+    assert merged.reactions.get_by_id("HEX").notes["origin"] == "A"
+    assert merged.reactions.get_by_id("LDH").notes["origin"] == "B"
+    assert merged.genes.get_by_id("GA").notes["origin"] == "A"
+
+
+def test_compartments_preserved(model_a):
+    model_a.compartments = {"c": "cytoplasm"}
+    merged = merge_models([model_a, model_a.copy()])
+    assert merged.compartments.get("c") == "cytoplasm"
+
+
+def test_single_model_returns_copy(model_a):
+    merged = merge_models([model_a])
+    assert merged is not model_a
+    assert {r.id for r in merged.reactions} == {r.id for r in model_a.reactions}
+
+
+def test_three_models(model_a, model_b):
+    c = _model("C", [cobra.Metabolite("co2_c", name="CO2", compartment="c")],
+               [{"id": "SINK", "equation": "co2_c -->"}])
+    merged = merge_models([model_a, model_b, c])
+    assert {"HEX", "LDH", "SINK"} <= {r.id for r in merged.reactions}
+
+
+def test_bad_match_by(model_a, model_b):
+    with pytest.raises(ValueError, match="match_by"):
+        merge_models([model_a, model_b], match_by="oops")
+
+
+# --- regression: formula/charge conflict (known_issues.md B1) --------------
+
+def test_formula_conflict_warns():
+    """Two models sharing a name[comp] but with different formulas warn instead
+    of silently keeping the first."""
+    a = _model("A",
+        [cobra.Metabolite("g1", name="Glucose", formula="C6H12O6", compartment="c")],
+        [{"id": "EX_A", "equation": "g1 -->"}])
+    b = _model("B",
+        [cobra.Metabolite("g2", name="Glucose", formula="C6H12O7", compartment="c")],
+        [{"id": "EX_B", "equation": "g2 -->"}])
+    with pytest.warns(UserWarning, match="different formulas"):
+        merged = merge_models([a, b])
+    # The merge still picks the first-seen — the test asserts the warning fired
+    # and the model survives.
+    assert "EX_A" in merged.reactions and "EX_B" in merged.reactions
+
+
+def test_charge_conflict_warns():
+    a = _model("A",
+        [cobra.Metabolite("g1", name="Glucose", formula="C6H12O6", charge=0, compartment="c")],
+        [{"id": "EX_A", "equation": "g1 -->"}])
+    b = _model("B",
+        [cobra.Metabolite("g2", name="Glucose", formula="C6H12O6", charge=-1, compartment="c")],
+        [{"id": "EX_B", "equation": "g2 -->"}])
+    with pytest.warns(UserWarning, match="different charges"):
+        merge_models([a, b])
diff --git a/tests/test_manipulation_remove.py b/tests/test_manipulation_remove.py
new file mode 100644
index 0000000..2b659b9
--- /dev/null
+++ b/tests/test_manipulation_remove.py
@@ -0,0 +1,97 @@
+"""Tests for raven_python.manipulation.remove (removeMets/removeGenes ports)."""
+import cobra
+import pytest
+
+from raven_python.manipulation import (
+    add_reactions_from_equations,
+    remove_genes,
+    remove_metabolites,
+)
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("t")
+    m.add_metabolites(
+        [
+            cobra.Metabolite("atp_c", name="ATP", compartment="c"),
+            cobra.Metabolite("atp_m", name="ATP", compartment="m"),
+            cobra.Metabolite("adp_c", name="ADP", compartment="c"),
+            cobra.Metabolite("x_c", name="X", compartment="c"),
+        ]
+    )
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R1", "equation": "atp_c --> adp_c", "gene_reaction_rule": "G1 and G2"},
+            {"id": "R2", "equation": "atp_c --> x_c", "gene_reaction_rule": "G3 or G4"},
+            {"id": "R3", "equation": "atp_m --> adp_c"},  # no GPR (spontaneous)
+        ],
+    )
+    return m
+
+
+# --- remove_metabolites ----------------------------------------------------
+
+def test_remove_metabolites_by_id(model):
+    remove_metabolites(model, ["x_c"])
+    assert "x_c" not in model.metabolites
+    # reaction kept, just lost the metabolite
+    assert "R2" in model.reactions
+
+
+def test_remove_metabolites_by_name_across_compartments(model):
+    # "ATP" exists in c and m; by_name removes both at once.
+    remove_metabolites(model, ["ATP"], by_name=True)
+    assert "atp_c" not in model.metabolites
+    assert "atp_m" not in model.metabolites
+    assert "adp_c" in model.metabolites
+
+
+def test_remove_metabolites_destructive(model):
+    remove_metabolites(model, ["adp_c"], destructive=True)
+    # R1 and R3 both produced adp_c -> removed
+    assert "adp_c" not in model.metabolites
+    assert "R1" not in model.reactions and "R3" not in model.reactions
+
+
+# --- remove_genes ----------------------------------------------------------
+
+def test_remove_genes_remove_mode(model):
+    blocked = remove_genes(model, ["G1"], blocked_reactions="remove")
+    # R1 = "G1 and G2": removing G1 breaks the complex -> blocked -> removed
+    assert blocked == ["R1"]
+    assert "R1" not in model.reactions
+    assert "R2" in model.reactions  # OR rule unaffected
+
+
+def test_remove_genes_constrain_mode(model):
+    blocked = remove_genes(model, ["G1"], blocked_reactions="constrain")
+    assert blocked == ["R1"]
+    r1 = model.reactions.get_by_id("R1")
+    assert r1.bounds == (0, 0)  # kept but constrained, per RAVEN default
+    assert r1.gene_reaction_rule == ""
+
+
+def test_remove_genes_keep_mode(model):
+    blocked = remove_genes(model, ["G1"], blocked_reactions="keep")
+    assert blocked == ["R1"]
+    r1 = model.reactions.get_by_id("R1")
+    assert r1.gene_reaction_rule == ""
+    assert r1.bounds != (0, 0)  # left untouched
+
+
+def test_remove_genes_or_rule_not_blocked(model):
+    blocked = remove_genes(model, ["G3"], blocked_reactions="remove")
+    # R2 = "G3 or G4": removing G3 leaves G4 -> not blocked
+    assert blocked == []
+    assert model.reactions.get_by_id("R2").gene_reaction_rule == "G4"
+
+
+def test_remove_genes_absent_gene_is_noop(model):
+    assert remove_genes(model, ["NOPE"]) == []
+
+
+def test_remove_genes_bad_policy(model):
+    with pytest.raises(ValueError, match="blocked_reactions"):
+        remove_genes(model, ["G1"], blocked_reactions="explode")
diff --git a/tests/test_manipulation_simplify.py b/tests/test_manipulation_simplify.py
new file mode 100644
index 0000000..586a0c3
--- /dev/null
+++ b/tests/test_manipulation_simplify.py
@@ -0,0 +1,184 @@
+"""Tests for simplifyModel reduction modes."""
+import cobra
+import pytest
+
+from raven_python.manipulation import (
+    add_reactions_from_equations,
+    constrain_reversible_reactions,
+    group_linear_reactions,
+    remove_dead_end_reactions,
+    remove_duplicate_reactions,
+)
+
+# --- remove_dead_end_reactions --------------------------------------------
+
+def test_dead_end_removed():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("a", "b", "dead")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R_in", "equation": " --> a"},
+            {"id": "R1", "equation": "a --> b"},
+            {"id": "R_out", "equation": "b --> "},
+            {"id": "R_dead", "equation": "a --> dead"},  # 'dead' only produced
+        ],
+    )
+    removed_rxns, removed_mets = remove_dead_end_reactions(m)
+    assert "R_dead" in removed_rxns
+    assert "dead" in removed_mets
+    # the productive path survives
+    assert {"R_in", "R1", "R_out"} <= {r.id for r in m.reactions}
+
+
+def test_dead_end_respects_reserved():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("a", "dead")])
+    add_reactions_from_equations(
+        m, [{"id": "R_in", "equation": " --> a"}, {"id": "R_dead", "equation": "a --> dead"}]
+    )
+    removed_rxns, _ = remove_dead_end_reactions(m, reserved=["R_dead"])
+    assert "R_dead" not in removed_rxns
+    assert "R_dead" in {r.id for r in m.reactions}
+
+
+# --- remove_duplicate_reactions -------------------------------------------
+
+def test_duplicates_removed():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("a", "b")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R1", "equation": "a --> b", "bounds": (0, 1000)},
+            {"id": "R2", "equation": "a --> b", "bounds": (0, 1000)},  # duplicate of R1
+            {"id": "R3", "equation": "a --> b", "bounds": (0, 500)},   # different bounds
+        ],
+    )
+    removed = remove_duplicate_reactions(m)
+    assert len(removed) == 1  # one of R1/R2 removed
+    assert {"R3"} <= {r.id for r in m.reactions}
+    assert sum(r.id in ("R1", "R2") for r in m.reactions) == 1
+
+
+def test_duplicates_keep_reserved():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("a", "b")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R1", "equation": "a --> b", "bounds": (0, 1000)},
+            {"id": "R2", "equation": "a --> b", "bounds": (0, 1000)},
+        ],
+    )
+    remove_duplicate_reactions(m, reserved=["R1"])
+    assert "R1" in {r.id for r in m.reactions}  # reserved one kept
+
+
+# --- constrain_reversible_reactions ---------------------------------------
+
+def test_forward_only_reversible_constrained():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("a", "b")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R_in", "equation": " --> a", "bounds": (0, 1000)},
+            {"id": "R1", "equation": "a <=> b", "bounds": (-1000, 1000)},  # can only go fwd
+            {"id": "R_out", "equation": "b --> ", "bounds": (0, 1000)},
+        ],
+    )
+    changed = constrain_reversible_reactions(m)
+    assert "R1" in changed
+    assert m.reactions.get_by_id("R1").lower_bound == 0  # constrained to forward
+
+
+def test_truly_reversible_unchanged():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("a", "b")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R_in", "equation": " <=> a", "bounds": (-1000, 1000)},
+            {"id": "R1", "equation": "a <=> b", "bounds": (-1000, 1000)},
+            {"id": "R_out", "equation": "b <=> ", "bounds": (-1000, 1000)},
+        ],
+    )
+    changed = constrain_reversible_reactions(m)
+    assert "R1" not in changed  # can go both ways
+
+
+# --- group_linear_reactions -----------------------------------------------
+
+def test_linear_chain_merged():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("a", "b", "c")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R1", "equation": "a --> b"},  # b: single producer
+            {"id": "R2", "equation": "b --> c"},  # b: single consumer
+        ],
+    )
+    n_before = len(m.reactions)
+    group_linear_reactions(m)
+    # b is eliminated; R1+R2 merged into one reaction a --> c
+    assert "b" not in m.metabolites
+    assert len(m.reactions) < n_before
+
+
+def test_group_linear_discards_genes():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("a", "b", "c")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R1", "equation": "a --> b", "gene_reaction_rule": "G1"},
+            {"id": "R2", "equation": "b --> c", "gene_reaction_rule": "G2"},
+        ],
+    )
+    group_linear_reactions(m)
+    assert len(m.genes) == 0
+
+
+# --- regression: incremental merge collapses a long chain (known_issues.md D1) ---
+
+def test_group_linear_merges_long_chain_in_one_pass():
+    """The incremental scan still flattens a 5-reaction linear chain — the
+    correctness property the original O(n²·m) restart-after-merge loop had."""
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in "abcdef"])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R_in", "equation": " --> a"},
+            {"id": "R1", "equation": "a --> b"},
+            {"id": "R2", "equation": "b --> c"},
+            {"id": "R3", "equation": "c --> d"},
+            {"id": "R4", "equation": "d --> e"},
+            {"id": "R5", "equation": "e --> f"},
+            {"id": "R_out", "equation": "f --> "},
+        ],
+    )
+    group_linear_reactions(m)
+    # All the chain's internal metabolites are gone.
+    assert {x for x in m.metabolites if x.id in {"b", "c", "d", "e"}} == set()
+
+
+# --- regression: NaN FVA on infeasible model (known_issues.md C1) ----------
+
+def test_constrain_reversible_raises_on_infeasible():
+    """An infeasible model produces NaN FVA ranges; the old abs(NaN) < eps
+    check silently treated those as 'truly reversible'. Now raises."""
+    m = cobra.Model("t")
+    a, b = (cobra.Metabolite(x, compartment="c") for x in ("a", "b"))
+    m.add_metabolites([a, b])
+    # Force a contradiction: r requires production AND consumption of a, but
+    # nothing else produces a.
+    r = cobra.Reaction("r", lower_bound=-1, upper_bound=1)
+    r.add_metabolites({a: -1, b: 1})
+    forced = cobra.Reaction("forced", lower_bound=5, upper_bound=10)  # infeasible
+    forced.add_metabolites({a: -1})
+    m.add_reactions([r, forced])
+    with pytest.raises(RuntimeError, match="infeasible"):
+        constrain_reversible_reactions(m)
diff --git a/tests/test_manipulation_transfer.py b/tests/test_manipulation_transfer.py
new file mode 100644
index 0000000..61c2ac9
--- /dev/null
+++ b/tests/test_manipulation_transfer.py
@@ -0,0 +1,137 @@
+"""Tests for add_reactions_from_model (addRxnsGenesMets port)."""
+import cobra
+import pytest
+
+from raven_python.manipulation import add_reactions_from_equations, add_reactions_from_model
+
+
+@pytest.fixture
+def draft():
+    m = cobra.Model("draft")
+    m.add_metabolites(
+        [cobra.Metabolite("glc_c", name="Glucose", formula="C6H12O6", compartment="c")]
+    )
+    # an existing reaction so glc_c is in use and we have an id to test skipping
+    add_reactions_from_equations(m, [{"id": "R_existing", "equation": "glc_c <=>"}])
+    return m
+
+
+@pytest.fixture
+def source():
+    m = cobra.Model("source")
+    m.add_metabolites(
+        [
+            # same name[comp] as draft's glc_c but a DIFFERENT id
+            cobra.Metabolite("glucose_c", name="Glucose", formula="C6H12O6", compartment="c"),
+            cobra.Metabolite("atp_c", name="ATP", formula="C10H16N5O13P3", charge=-4, compartment="c"),
+            cobra.Metabolite("g6p_c", name="G6P", formula="C6H13O9P", compartment="c"),
+        ]
+    )
+    add_reactions_from_equations(
+        m,
+        [
+            {
+                "id": "HEX",
+                "equation": "glucose_c + atp_c --> g6p_c",
+                "name": "hexokinase",
+                "bounds": (0, 1000),
+                "gene_reaction_rule": "G1",
+                "subsystem": "glycolysis",
+            },
+            {"id": "R_existing", "equation": "glucose_c <=>"},  # id already in draft
+        ],
+    )
+    return m
+
+
+def test_metabolite_matched_by_name_comp_not_id(draft, source):
+    add_reactions_from_model(draft, source, "HEX")
+    hex_rxn = draft.reactions.get_by_id("HEX")
+    # Glucose reused from the draft (id glc_c), NOT the source's glucose_c
+    assert "glc_c" in {m.id for m in hex_rxn.metabolites}
+    assert "glucose_c" not in draft.metabolites
+
+
+def test_new_metabolites_added_with_metadata(draft, source):
+    add_reactions_from_model(draft, source, "HEX")
+    assert "atp_c" in draft.metabolites and "g6p_c" in draft.metabolites
+    assert draft.metabolites.get_by_id("g6p_c").formula == "C6H13O9P"
+    assert draft.metabolites.get_by_id("atp_c").charge == -4
+
+
+def test_reaction_copied_with_bounds_and_name(draft, source):
+    (rxn,) = add_reactions_from_model(draft, source, "HEX")
+    assert rxn.id == "HEX"
+    assert rxn.name == "hexokinase"
+    assert rxn.bounds == (0, 1000)
+    assert rxn.subsystem == "glycolysis"
+    assert {m.id: rxn.get_coefficient(m.id) for m in rxn.metabolites} == {
+        "glc_c": -1.0,
+        "atp_c": -1.0,
+        "g6p_c": 1.0,
+    }
+
+
+def test_genes_true_copies_gpr_and_creates_genes(draft, source):
+    add_reactions_from_model(draft, source, "HEX", genes=True)
+    assert draft.reactions.get_by_id("HEX").gene_reaction_rule == "G1"
+    assert "G1" in draft.genes
+
+
+def test_genes_false_no_gpr(draft, source):
+    add_reactions_from_model(draft, source, "HEX", genes=False)
+    assert draft.reactions.get_by_id("HEX").gene_reaction_rule == ""
+
+
+def test_genes_string_override(draft, source):
+    add_reactions_from_model(draft, source, "HEX", genes="G9 or G10")
+    assert draft.reactions.get_by_id("HEX").gene_reaction_rule == "G9 or G10"
+
+
+def test_skips_already_present(draft, source):
+    added = add_reactions_from_model(draft, source, ["HEX", "R_existing"])
+    assert [r.id for r in added] == ["HEX"]
+
+
+def test_all_present_raises(draft, source):
+    with pytest.raises(ValueError, match="already in the model"):
+        add_reactions_from_model(draft, source, "R_existing")
+
+
+def test_unknown_source_reaction_raises(draft, source):
+    with pytest.raises(ValueError, match="not found in the source model"):
+        add_reactions_from_model(draft, source, "NOPE")
+
+
+def test_note_and_confidence_stored(draft, source):
+    (rxn,) = add_reactions_from_model(draft, source, "HEX", note="from KEGG", confidence=2)
+    assert rxn.notes["note"] == "from KEGG"
+    assert rxn.notes["confidence_score"] == 2
+
+
+# --- regression: intra-batch met-id minting collision (known_issues.md A3) ---
+
+def test_intra_batch_id_minting_unique():
+    """Two source mets whose ids both collide with the draft and whose name[comp]
+    differs both get routed through new-id minting. The fix tracks ids minted in
+    the current batch so the two don't collapse to the same generated id."""
+    draft = cobra.Model("draft")
+    draft.add_metabolites([
+        cobra.Metabolite("atp_c", name="ATP-draft", compartment="c"),
+        cobra.Metabolite("adp_c", name="ADP-draft", compartment="c"),
+    ])
+    source = cobra.Model("source")
+    source.add_metabolites([
+        cobra.Metabolite("atp_c", name="ATP-source", compartment="c"),
+        cobra.Metabolite("adp_c", name="ADP-source", compartment="c"),
+    ])
+    rxn = cobra.Reaction("R1", lower_bound=0, upper_bound=1000)
+    source.add_reactions([rxn])
+    rxn.add_metabolites({
+        source.metabolites.get_by_id("atp_c"): -1,
+        source.metabolites.get_by_id("adp_c"): 1,
+    })
+    add_reactions_from_model(draft, source, "R1")
+    # Both source mets minted distinct ids (m1 and m2) — not a collision.
+    new_ids = sorted(m.id for m in draft.metabolites if m.id not in ("atp_c", "adp_c"))
+    assert len(new_ids) == 2 and len(set(new_ids)) == 2
diff --git a/tests/test_manipulation_transport.py b/tests/test_manipulation_transport.py
new file mode 100644
index 0000000..e8fb2b6
--- /dev/null
+++ b/tests/test_manipulation_transport.py
@@ -0,0 +1,98 @@
+"""Tests for add_transport_reactions (addTransport port)."""
+import cobra
+import pytest
+
+from raven_python.manipulation import add_transport_reactions
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("t")
+    m.compartments = {"c": "cytoplasm", "m": "mitochondrion", "e": "extracellular"}
+    m.add_metabolites(
+        [
+            cobra.Metabolite("atp_c", name="ATP", formula="C10H16N5O13P3", charge=-4, compartment="c"),
+            cobra.Metabolite("h2o_c", name="H2O", formula="H2O", compartment="c"),
+            cobra.Metabolite("atp_m", name="ATP", compartment="m"),  # exists in m
+        ]
+    )
+    return m
+
+
+def test_basic_transport_to_existing(model):
+    added = add_transport_reactions(model, "c", "m", ["ATP"])
+    assert len(added) == 1
+    rxn = added[0]
+    assert rxn.id == "tr_0001"
+    assert rxn.name == "ATP transport, cytoplasm-mitochondrion"
+    assert {m.id: rxn.get_coefficient(m.id) for m in rxn.metabolites} == {
+        "atp_c": -1.0,
+        "atp_m": 1.0,
+    }
+    assert rxn.reversibility is True
+
+
+def test_only_to_existing_skips_missing(model):
+    # H2O is not in m; with only_to_existing (default) it's skipped
+    added = add_transport_reactions(model, "c", "m", ["ATP", "H2O"])
+    assert [r.id for r in added] == ["tr_0001"]  # only ATP
+
+
+def test_creates_missing_target_metabolite(model):
+    added = add_transport_reactions(
+        model, "c", "m", ["H2O"], only_to_existing=False
+    )
+    assert len(added) == 1
+    new = [mt for mt in model.metabolites if mt.name == "H2O" and mt.compartment == "m"]
+    assert len(new) == 1
+    assert new[0].formula == "H2O"  # copied from source
+
+
+def test_copies_formula_and_charge(model):
+    add_transport_reactions(model, "c", "e", ["ATP"], only_to_existing=False)
+    new = [mt for mt in model.metabolites if mt.name == "ATP" and mt.compartment == "e"][0]
+    assert new.formula == "C10H16N5O13P3"
+    assert new.charge == -4
+
+
+def test_irreversible(model):
+    (rxn,) = add_transport_reactions(model, "c", "m", ["ATP"], reversible=False)
+    assert rxn.lower_bound == 0
+    assert rxn.reversibility is False
+
+
+def test_default_all_metabolites_in_from(model):
+    # default metabolite_names = all in c (ATP, H2O); to m, only_to_existing -> only ATP
+    added = add_transport_reactions(model, "c", "m")
+    assert [r.id for r in added] == ["tr_0001"]
+
+
+def test_multiple_target_compartments_and_sequential_ids(model):
+    added = add_transport_reactions(
+        model, "c", ["m", "e"], ["ATP"], only_to_existing=False
+    )
+    assert [r.id for r in added] == ["tr_0001", "tr_0002"]
+
+
+def test_unknown_compartment_raises(model):
+    with pytest.raises(ValueError, match="not in the model"):
+        add_transport_reactions(model, "x", "m", ["ATP"])
+
+
+def test_unknown_metabolite_raises(model):
+    with pytest.raises(ValueError, match="not found in compartment"):
+        add_transport_reactions(model, "c", "m", ["NOPE"])
+
+
+# --- regression: duplicate name in compartment (known_issues.md A4) --------
+
+def test_duplicate_name_in_source_compartment_warns(model):
+    """Two source mets sharing a name in the same compartment warn instead of
+    silently collapsing — previously one was dropped from the lookup dict."""
+    model.add_metabolites([
+        cobra.Metabolite("h2o2_c", name="H2O", compartment="c"),  # duplicate name
+    ])
+    with pytest.warns(UserWarning, match="Multiple metabolites named 'H2O'"):
+        added = add_transport_reactions(model, "c", "m", ["H2O"], only_to_existing=False)
+    # Transport still works (uses the first match) — the warning is the signal.
+    assert len(added) == 1
diff --git a/tests/test_omics_hpa.py b/tests/test_omics_hpa.py
new file mode 100644
index 0000000..8b2c124
--- /dev/null
+++ b/tests/test_omics_hpa.py
@@ -0,0 +1,154 @@
+"""Tests for omics/hpa.py — HPA parsing + score adapters (Phase 5)."""
+from __future__ import annotations
+
+from textwrap import dedent
+
+import pytest
+
+from raven_python.omics import (
+    HPA_LEVEL_SCORES,
+    HPAData,
+    HPARnaData,
+    hpa_gene_scores,
+    parse_hpa,
+    parse_hpa_rna,
+    rna_gene_scores,
+)
+
+
+@pytest.fixture
+def hpa_tsv(tmp_path):
+    """Minimal HPA proteomics TSV with two genes × two tissues × two cell types."""
+    p = tmp_path / "hpa.tsv"
+    p.write_text(dedent("""\
+        Gene\tGene name\tTissue\tCell type\tLevel\tReliability
+        ENSG1\tGeneA\tliver\thepatocytes\tHigh\tEnhanced
+        ENSG1\tGeneA\tliver\tbile duct cells\tLow\tApproved
+        ENSG1\tGeneA\tkidney\ttubular cells\tNot detected\tApproved
+        ENSG2\tGeneB\tliver\thepatocytes\tMedium\tSupported
+        ENSG2\tGeneB\tkidney\ttubular cells\tHigh\tEnhanced
+        ENSG3\tGeneC\tliver\thepatocytes\tMixed\tUncertain
+    """))
+    return p
+
+
+@pytest.fixture
+def rna_tsv(tmp_path):
+    """Tidy HPA-style RNA-seq TSV (Gene/Gene name/Tissue/TPM)."""
+    p = tmp_path / "rna.tsv"
+    p.write_text(dedent("""\
+        Gene\tGene name\tTissue\tTPM
+        ENSG1\tGeneA\tliver\t100.0
+        ENSG1\tGeneA\tkidney\t10.0
+        ENSG2\tGeneB\tliver\t5.0
+        ENSG2\tGeneB\tkidney\t50.0
+    """))
+    return p
+
+
+# ---------------------------------------------------------------------- parsers
+
+def test_parse_hpa_basic(hpa_tsv):
+    hpa = parse_hpa(hpa_tsv)
+    assert isinstance(hpa, HPAData)
+    assert hpa.tissues() == ["kidney", "liver"]
+    assert hpa.celltypes("liver") == ["bile duct cells", "hepatocytes"]
+    # one row per (gene, tissue, celltype):
+    assert len(hpa.df) == 6
+    assert set(hpa.df.columns) == {"gene_id", "gene_name", "tissue", "celltype",
+                                    "level", "reliability"}
+
+
+def test_parse_hpa_missing_columns(tmp_path):
+    p = tmp_path / "bad.tsv"
+    p.write_text("Gene\tTissue\nx\ty\n")
+    with pytest.raises(ValueError, match="missing HPA columns"):
+        parse_hpa(p)
+
+
+def test_parse_hpa_rna_tidy(rna_tsv):
+    rna = parse_hpa_rna(rna_tsv)
+    assert isinstance(rna, HPARnaData)
+    assert rna.tissues() == ["kidney", "liver"]
+    assert rna.expression("liver") == {"ENSG1": 100.0, "ENSG2": 5.0}
+
+
+def test_parse_hpa_rna_wide_layout(tmp_path):
+    """The older wide layout (one TPM column per tissue) is melted to the tidy form."""
+    p = tmp_path / "rna_wide.tsv"
+    p.write_text(dedent("""\
+        Gene\tGene name\tliver\tkidney
+        ENSG1\tGeneA\t100\t10
+        ENSG2\tGeneB\t5\t50
+    """))
+    rna = parse_hpa_rna(p)
+    assert rna.expression("liver") == {"ENSG1": 100.0, "ENSG2": 5.0}
+    assert rna.expression("kidney") == {"ENSG1": 10.0, "ENSG2": 50.0}
+
+
+# ---------------------------------------------------------------------- scoring
+
+def test_hpa_gene_scores_best_picks_max(hpa_tsv):
+    """In liver, ENSG1 is High (hepatocytes) + Low (bile duct) → best = 20."""
+    g = hpa_gene_scores(parse_hpa(hpa_tsv), "liver", multiple_celltype="best")
+    assert g["ENSG1"] == HPA_LEVEL_SCORES["High"]      # 20
+    assert g["ENSG2"] == HPA_LEVEL_SCORES["Medium"]    # 15
+
+
+def test_hpa_gene_scores_average(hpa_tsv):
+    """Average across cell types: ENSG1 in liver = mean(20, 10) = 15."""
+    g = hpa_gene_scores(parse_hpa(hpa_tsv), "liver", multiple_celltype="average")
+    assert g["ENSG1"] == pytest.approx(15.0)
+
+
+def test_hpa_gene_scores_celltype_filter(hpa_tsv):
+    """Restricting to a celltype gives only that celltype's score."""
+    g = hpa_gene_scores(parse_hpa(hpa_tsv), "liver", celltype="bile duct cells")
+    assert g == {"ENSG1": HPA_LEVEL_SCORES["Low"]}     # 10; GeneB has no bile-duct row
+
+
+def test_hpa_gene_scores_unknown_level_omitted(hpa_tsv):
+    """A 'Mixed' / 'N/A' level is not in HPA_LEVEL_SCORES and is dropped (not -inf)."""
+    g = hpa_gene_scores(parse_hpa(hpa_tsv), "liver")
+    assert "ENSG3" not in g    # the only ENSG3 row in liver has level='Mixed'
+
+
+def test_hpa_gene_scores_unknown_celltype_returns_empty(hpa_tsv):
+    g = hpa_gene_scores(parse_hpa(hpa_tsv), "liver", celltype="cardiomyocytes")
+    assert g == {}
+
+
+def test_hpa_gene_scores_custom_level_table(hpa_tsv):
+    """``level_scores`` overrides the default mapping."""
+    g = hpa_gene_scores(parse_hpa(hpa_tsv), "liver",
+                        level_scores={"High": 1.0, "Medium": 0.5, "Low": 0.1, "Not detected": -1.0})
+    assert g == {"ENSG1": 1.0, "ENSG2": 0.5}
+
+
+def test_rna_gene_scores_against_per_gene_mean(rna_tsv):
+    """Default reference is per-gene cross-tissue mean (RAVEN arrayData.threshold default).
+
+    ENSG1 liver TPM=100, mean across tissues=55 → log(100/55) > 0 → positive score.
+    ENSG2 liver TPM=5,   mean=27.5            → log(5/27.5) < 0 → negative score.
+    """
+    g = rna_gene_scores(parse_hpa_rna(rna_tsv), "liver")
+    assert g["ENSG1"] > 0
+    assert g["ENSG2"] < 0
+
+
+def test_rna_gene_scores_scalar_reference(rna_tsv):
+    """A scalar reference applies to all genes (and reuses gene_scores_from_expression)."""
+    g = rna_gene_scores(parse_hpa_rna(rna_tsv), "liver", reference=10.0)
+    # ENSG1 TPM=100, ref=10 → ln(10)*5 ≈ 11.5 → clamped to max_score=10.
+    assert g["ENSG1"] == 10.0
+    assert g["ENSG2"] < 0  # TPM=5 < ref=10
+
+
+def test_rna_gene_scores_unknown_tissue_raises(rna_tsv):
+    with pytest.raises(ValueError, match="not in dataset"):
+        rna_gene_scores(parse_hpa_rna(rna_tsv), "spleen")
+
+
+def test_hpa_gene_scores_invalid_multiple_celltype(hpa_tsv):
+    with pytest.raises(ValueError, match="multiple_celltype"):
+        hpa_gene_scores(parse_hpa(hpa_tsv), "liver", multiple_celltype="weighted")
diff --git a/tests/test_parameters.py b/tests/test_parameters.py
new file mode 100644
index 0000000..c0ab06c
--- /dev/null
+++ b/tests/test_parameters.py
@@ -0,0 +1,60 @@
+"""Tests for set_variance_bounds (the var mode of setParam)."""
+import cobra
+import pytest
+
+from raven_python.manipulation import add_reactions_from_equations, set_variance_bounds
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("t")
+    m.add_metabolites(
+        [cobra.Metabolite("a_c", compartment="c"), cobra.Metabolite("b_c", compartment="c")]
+    )
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R1", "equation": "a_c <=> b_c"},
+            {"id": "R2", "equation": "a_c <=> b_c"},
+        ],
+    )
+    return m
+
+
+def test_band_positive(model):
+    set_variance_bounds(model, "R1", 100, 5)  # 97.5 .. 102.5
+    lb, ub = model.reactions.get_by_id("R1").bounds
+    assert lb == pytest.approx(97.5)
+    assert ub == pytest.approx(102.5)
+
+
+def test_band_negative_is_ordered(model):
+    set_variance_bounds(model, "R1", -100, 5)
+    lb, ub = model.reactions.get_by_id("R1").bounds
+    assert lb == pytest.approx(-102.5)
+    assert ub == pytest.approx(-97.5)
+    assert lb <= ub
+
+
+def test_broadcast_scalar(model):
+    set_variance_bounds(model, ["R1", "R2"], 50, 10)
+    for rid in ("R1", "R2"):
+        lb, ub = model.reactions.get_by_id(rid).bounds
+        assert lb == pytest.approx(47.5)
+        assert ub == pytest.approx(52.5)
+
+
+def test_per_reaction_values(model):
+    set_variance_bounds(model, ["R1", "R2"], [100, 200], 0)
+    assert model.reactions.get_by_id("R1").bounds == pytest.approx((100, 100))
+    assert model.reactions.get_by_id("R2").bounds == pytest.approx((200, 200))
+
+
+def test_length_mismatch_raises(model):
+    with pytest.raises(ValueError, match="to match the reactions"):
+        set_variance_bounds(model, ["R1", "R2"], [1, 2, 3], 5)
+
+
+def test_unknown_reaction_raises(model):
+    with pytest.raises(ValueError, match="not found"):
+        set_variance_bounds(model, "NOPE", 1, 5)
diff --git a/tests/test_reconstruction_blast.py b/tests/test_reconstruction_blast.py
new file mode 100644
index 0000000..32af556
--- /dev/null
+++ b/tests/test_reconstruction_blast.py
@@ -0,0 +1,78 @@
+"""Tests for run_blast / run_diamond / blast_from_table + the tabular parser."""
+import shutil
+
+import pandas as pd
+import pytest
+
+from raven_python.reconstruction.homology import HIT_COLUMNS, blast_from_table, run_blast
+from raven_python.reconstruction.homology.blast import _parse_tabular
+
+_SEQ = (
+    "MSTNPKPQRKTKRNTNRRPQDVKFPGGGQIVGGVYLLPRRGPRLGVRATRKTSERSQPRGRRQPIPKARRPEGRTWAQPGYPWPLYGNEGCGWAGWLLSPRG"
+)
+
+
+def test_parse_tabular_csv():
+    text = "tg1,ng1,1e-50,99.0,120,250.0,99.5\ntg2,ng2,0.0,100.0,200,400.0,100.0\n"
+    df = _parse_tabular(text, "templ", "org", sep=",")
+    assert list(df.columns) == HIT_COLUMNS
+    assert df.iloc[0].from_gene == "tg1" and df.iloc[0].to_gene == "ng1"
+    assert df.iloc[0].from_id == "templ" and df.iloc[0].to_id == "org"
+    assert df.iloc[1].identity == 100.0 and df.iloc[1].align_len == 200
+
+
+def test_parse_tabular_empty():
+    assert _parse_tabular("", "a", "b", sep=",").empty
+
+
+def test_blast_from_table_dataframe_roundtrip():
+    df = pd.DataFrame(
+        [["templ", "org", "tg1", "ng1", 0.0, 100.0, 100, 200.0, 100.0]],
+        columns=HIT_COLUMNS + ["extra"][:0],  # exactly HIT_COLUMNS
+    )
+    out = blast_from_table(df)
+    assert list(out.columns) == HIT_COLUMNS
+    assert len(out) == 1
+
+
+def test_blast_from_table_csv(tmp_path):
+    p = tmp_path / "hits.csv"
+    pd.DataFrame(
+        [["templ", "org", "tg1", "ng1", 0.0, 100.0, 100, 200.0, 100.0]], columns=HIT_COLUMNS
+    ).to_csv(p, index=False)
+    out = blast_from_table(p)
+    assert out.iloc[0].from_gene == "tg1"
+
+
+def test_blast_from_table_missing_columns():
+    with pytest.raises(ValueError, match="missing required columns"):
+        blast_from_table(pd.DataFrame({"from_id": ["x"]}))
+
+
+def test_blast_from_table_csv_numeric_gene_ids_stay_str(tmp_path):
+    """All-numeric gene ids (e.g. Entrez) read as str, so they match model gene ids."""
+    p = tmp_path / "hits.csv"
+    pd.DataFrame(
+        [["templ", "org", 125, 4790, 0.0, 100.0, 100, 200.0, 100.0]], columns=HIT_COLUMNS
+    ).to_csv(p, index=False)
+    out = blast_from_table(p)
+    assert out.iloc[0].from_gene == "125" and out.iloc[0].to_gene == "4790"
+
+
+@pytest.mark.skipif(
+    not (shutil.which("blastp") and shutil.which("makeblastdb")), reason="BLAST+ not installed"
+)
+def test_run_blast_integration(tmp_path):
+    org = tmp_path / "org.faa"
+    ref = tmp_path / "templ.faa"
+    org.write_text(f">ngene\n{_SEQ}\n")
+    ref.write_text(f">tgene\n{_SEQ}\n")  # identical sequence -> strong reciprocal hit
+
+    hits = run_blast("org", org, ["templ"], [ref])
+    assert list(hits.columns) == HIT_COLUMNS
+    assert not hits.empty
+    # both directions present
+    assert {("templ", "org"), ("org", "templ")} <= set(zip(hits.from_id, hits.to_id, strict=False))
+    # the reciprocal pair tgene<->ngene is found
+    fwd = hits[(hits.from_gene == "tgene") & (hits.to_gene == "ngene")]
+    assert not fwd.empty
diff --git a/tests/test_reconstruction_homology.py b/tests/test_reconstruction_homology.py
new file mode 100644
index 0000000..63ed72f
--- /dev/null
+++ b/tests/test_reconstruction_homology.py
@@ -0,0 +1,138 @@
+"""Tests for homology reconstruction core (make_ortholog_hits + get_model_from_homology)."""
+import cobra
+import pandas as pd
+import pytest
+
+from raven_python.manipulation import add_reactions_from_equations
+from raven_python.reconstruction.homology import (
+    HIT_COLUMNS,
+    get_model_from_homology,
+    make_ortholog_hits,
+)
+
+# --- make_ortholog_hits ----------------------------------------------------
+
+def test_make_ortholog_hits_bidirectional():
+    hits = make_ortholog_hits([("tA", "nA"), ("tB", "nB")], "template", "neworg")
+    assert list(hits.columns) == HIT_COLUMNS
+    assert len(hits) == 4  # 2 pairs x 2 directions
+    fwd = hits[(hits.from_id == "template") & (hits.from_gene == "tA")]
+    assert fwd.iloc[0].to_gene == "nA"
+    rev = hits[(hits.from_id == "neworg") & (hits.from_gene == "nA")]
+    assert rev.iloc[0].to_gene == "tA"
+
+
+def test_make_ortholog_hits_empty_raises():
+    with pytest.raises(ValueError, match="empty"):
+        make_ortholog_hits([], "t", "n")
+
+
+# --- template model fixture ------------------------------------------------
+
+def _template():
+    m = cobra.Model("templateGEM")
+    m.compartments = {"c": "cytoplasm"}
+    m.add_metabolites([cobra.Metabolite(x, name=x.upper(), compartment="c") for x in ("a", "b", "d")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R_single", "equation": "a --> b", "gene_reaction_rule": "tg1"},
+            {"id": "R_iso", "equation": "b --> d", "gene_reaction_rule": "tg2 or tg3"},
+            {"id": "R_cplx", "equation": "a --> d", "gene_reaction_rule": "tg4 and tg5"},
+        ],
+    )
+    return m
+
+
+# --- one-to-one transfer ---------------------------------------------------
+
+def test_single_gene_reaction_transferred():
+    t = _template()
+    hits = make_ortholog_hits([("tg1", "ng1")], "templateGEM", "bug")
+    res = get_model_from_homology([t], hits, "bug")
+    assert res.model.id == "bug"
+    assert "R_single" in {r.id for r in res.model.reactions}
+    assert res.model.reactions.get_by_id("R_single").gene_reaction_rule == "ng1"
+
+
+def test_unsupported_reaction_dropped():
+    t = _template()
+    hits = make_ortholog_hits([("tg1", "ng1")], "templateGEM", "bug")  # only tg1 mapped
+    res = get_model_from_homology([t], hits, "bug")
+    # R_iso (tg2/tg3) and R_cplx (tg4/tg5) have no ortholog -> dropped
+    assert {r.id for r in res.model.reactions} == {"R_single"}
+
+
+def test_one_to_many_orthologs_become_or():
+    t = _template()
+    hits = make_ortholog_hits([("tg1", "ngA"), ("tg1", "ngB")], "templateGEM", "bug")
+    res = get_model_from_homology([t], hits, "bug")
+    assert res.model.reactions.get_by_id("R_single").gene_reaction_rule == "ngA or ngB"
+
+
+# --- isozyme (OR) handling -------------------------------------------------
+
+def test_isozyme_branch_without_ortholog_dropped():
+    t = _template()
+    hits = make_ortholog_hits([("tg2", "ng2")], "templateGEM", "bug")  # only one isozyme maps
+    res = get_model_from_homology([t], hits, "bug")
+    assert res.model.reactions.get_by_id("R_iso").gene_reaction_rule == "ng2"
+
+
+# --- complex (AND) policies ------------------------------------------------
+
+def _complex_hits():
+    # only tg4 of the tg4-and-tg5 complex has an ortholog
+    return make_ortholog_hits([("tg4", "ng4")], "templateGEM", "bug")
+
+
+def test_complex_policy_flag_keeps_old_marker():
+    res = get_model_from_homology([_template()], _complex_hits(), "bug", complex_policy="flag")
+    gpr = res.model.reactions.get_by_id("R_cplx").gene_reaction_rule
+    assert "ng4" in gpr and "OLD_templateGEM_tg5" in gpr and " and " in gpr
+
+
+def test_complex_policy_keep_drops_unmapped_subunit():
+    res = get_model_from_homology([_template()], _complex_hits(), "bug", complex_policy="keep")
+    assert res.model.reactions.get_by_id("R_cplx").gene_reaction_rule == "ng4"
+
+
+def test_complex_policy_drop_removes_reaction():
+    res = get_model_from_homology([_template()], _complex_hits(), "bug", complex_policy="drop")
+    assert "R_cplx" not in {r.id for r in res.model.reactions}
+
+
+# --- strictness alias + bidirectional --------------------------------------
+
+def test_strictness_alias_maps_params():
+    t = _template()
+    hits = make_ortholog_hits([("tg1", "ng1")], "templateGEM", "bug")
+    res = get_model_from_homology([t], hits, "bug", strictness=3)  # bidir + best-hits
+    assert "R_single" in {r.id for r in res.model.reactions}
+
+
+def test_one_directional_non_reciprocal():
+    # build hits with only the new->old direction present
+    hits = make_ortholog_hits([("tg1", "ng1")], "templateGEM", "bug")
+    one_way = hits[hits.from_id == "bug"]  # drop the template->new rows
+    t = _template()
+    # bidirectional default would find nothing; one-directional should map
+    assert "R_single" not in {r.id for r in get_model_from_homology([t], one_way, "bug").model.reactions}
+    res = get_model_from_homology([t], one_way, "bug", bidirectional=False, map_direction="new_to_old")
+    assert "R_single" in {r.id for r in res.model.reactions}
+
+
+# --- preferred order -------------------------------------------------------
+
+def test_preferred_order_routes_gene_to_one_model():
+    t1 = _template()
+    t1.id = "modelA"
+    t2 = _template()
+    t2.id = "modelB"
+    hits1 = make_ortholog_hits([("tg1", "ng1")], "modelA", "bug")
+    hits2 = make_ortholog_hits([("tg1", "ng1")], "modelB", "bug")
+    hits = pd.concat([hits1, hits2], ignore_index=True)
+    res = get_model_from_homology([t1, t2], hits, "bug", preferred_order=["modelA", "modelB"])
+    # ng1's reaction comes only from modelA
+    sources = {r.notes.get("homology_source") for r in res.model.reactions if r.id.startswith("R_single")}
+    assert sources == {"modelA"}
diff --git a/tests/test_reconstruction_kegg_download.py b/tests/test_reconstruction_kegg_download.py
new file mode 100644
index 0000000..38d2f44
--- /dev/null
+++ b/tests/test_reconstruction_kegg_download.py
@@ -0,0 +1,125 @@
+"""Tests for the KEGG download/arrange tooling (reconstruction/kegg/download.py).
+
+The network fetch needs a paid KEGG subscription, so it is not exercised here.
+We test credential resolution and the network-free extract/arrange core against
+hand-built fake archives.
+"""
+import gzip
+import io
+import tarfile
+from pathlib import Path
+
+import pytest
+
+from raven_python.reconstruction.kegg.download import (
+    _resolve_auth,
+    extract_kegg_dump,
+)
+
+
+def _make_targz(path: Path, members: dict[str, bytes]) -> None:
+    with tarfile.open(path, "w:gz") as tar:
+        for name, data in members.items():
+            info = tarfile.TarInfo(name)
+            info.size = len(data)
+            tar.addfile(info, io.BytesIO(data))
+
+
+def _make_gz(path: Path, data: bytes) -> None:
+    with gzip.open(path, "wb") as fh:
+        fh.write(data)
+
+
+# --------------------------------------------------------------------------- #
+# Credentials
+# --------------------------------------------------------------------------- #
+def test_resolve_auth_explicit_wins():
+    assert _resolve_auth("ftp.kegg.net", auth=("u", "p")) == ("u", "p")
+
+
+def test_resolve_auth_from_netrc(tmp_path):
+    netrc_file = tmp_path / ".netrc"
+    netrc_file.write_text("machine ftp.kegg.net login alice password s3cret\n")
+    netrc_file.chmod(0o600)
+    assert _resolve_auth("ftp.kegg.net", netrc_path=netrc_file) == ("alice", "s3cret")
+
+
+def test_resolve_auth_missing_file(tmp_path):
+    with pytest.raises(FileNotFoundError, match="does not exist"):
+        _resolve_auth("ftp.kegg.net", netrc_path=tmp_path / "nope")
+
+
+def test_resolve_auth_host_absent(tmp_path):
+    netrc_file = tmp_path / ".netrc"
+    netrc_file.write_text("machine other.host login a password b\n")
+    netrc_file.chmod(0o600)
+    with pytest.raises(ValueError, match="No credentials for"):
+        _resolve_auth("ftp.kegg.net", netrc_path=netrc_file)
+
+
+# --------------------------------------------------------------------------- #
+# Extract / arrange
+# --------------------------------------------------------------------------- #
+@pytest.fixture
+def fake_dump(tmp_path):
+    """A tmp dir populated with fake KEGG archives, as fetch would leave them."""
+    _make_targz(
+        tmp_path / "reaction.tar.gz",
+        {
+            "reaction/reaction": b"RXN_ENTRIES\n",
+            "reaction/reaction.lst": b"R00010: A <=> B\n",
+            "reaction/reaction_mapformula.lst": b"R00010: 00010: A => B\n",
+            "reaction/reaction.name": b"discard me\n",  # extra file, not lifted
+        },
+    )
+    _make_targz(
+        tmp_path / "compound.tar.gz",
+        {"compound/compound": b"CPD\n", "compound/compound.inchi": b"C00031\tInChI=x\n"},
+    )
+    _make_targz(tmp_path / "glycan.tar.gz", {"glycan/glycan": b"GLY\n"})
+    _make_targz(tmp_path / "ko.tar.gz", {"ko/ko": b"KO\n"})
+    _make_gz(tmp_path / "eukaryotes.pep.gz", b">euk\nMKV\n")
+    _make_gz(tmp_path / "prokaryotes.pep.gz", b">prok\nMAA\n")
+    (tmp_path / "taxonomy").write_text("tax\n")
+    return tmp_path
+
+
+def test_extract_produces_flat_layout(fake_dump):
+    result = extract_kegg_dump(fake_dump)
+    expected = {
+        "reaction",
+        "reaction.lst",
+        "reaction_mapformula.lst",
+        "compound",
+        "compound.inchi",
+        "ko",
+        "genes.pep",
+        "taxonomy",
+    }
+    assert set(result) == expected
+    assert all(p.is_file() for p in result.values())
+
+
+def test_extract_concatenates_compound_and_glycan(fake_dump):
+    extract_kegg_dump(fake_dump)
+    assert (fake_dump / "compound").read_bytes() == b"CPD\nGLY\n"
+
+
+def test_extract_concatenates_proteomes(fake_dump):
+    extract_kegg_dump(fake_dump)
+    assert (fake_dump / "genes.pep").read_bytes() == b">euk\nMKV\n>prok\nMAA\n"
+
+
+def test_extract_removes_subdirs_and_archives(fake_dump):
+    extract_kegg_dump(fake_dump)
+    assert not list(fake_dump.glob("*.tar.gz"))
+    assert not list(fake_dump.glob("*.gz"))
+    for subdir in ("reaction", "compound", "glycan", "ko"):
+        assert not (fake_dump / subdir).is_dir()
+    assert not (fake_dump / "reaction.name").exists()  # extra file discarded
+
+
+def test_extract_requires_core_archives(tmp_path):
+    _make_targz(tmp_path / "compound.tar.gz", {"compound/compound": b"CPD\n"})
+    with pytest.raises(FileNotFoundError, match="required file"):
+        extract_kegg_dump(tmp_path)
diff --git a/tests/test_reconstruction_kegg_hmm.py b/tests/test_reconstruction_kegg_hmm.py
new file mode 100644
index 0000000..3f559ec
--- /dev/null
+++ b/tests/test_reconstruction_kegg_hmm.py
@@ -0,0 +1,326 @@
+"""Tests for KEGG HMM-library construction (taxonomy + hmm, step 3b.3)."""
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from raven_python.reconstruction.kegg import (
+    build_ko_fastas,
+    organism_domains,
+    organisms_in_domain,
+    parse_taxonomy,
+)
+from raven_python.reconstruction.kegg import hmm as hmm_mod
+from raven_python.reconstruction.kegg.hmm import (
+    _cdhit_cmd,
+    _cdhit_word_size,
+    _fasta_stats,
+    _hmmbuild_cmd,
+    _mafft_cmd,
+    build_ko_hmm,
+)
+
+DUMP = Path(__file__).parent / "data" / "kegg_dump"
+
+
+@pytest.fixture
+def organism_gene_ko():
+    return pd.DataFrame(
+        [
+            ("bsu", "BSU31050", "K01194"),
+            ("bsu", "BSU31060", "K01194"),
+            ("hsa", "124", "K01194"),
+            ("hsa", "125", "K01194"),
+            ("eco", "b0001", "K00002"),
+        ],
+        columns=["organism", "gene", "ko"],
+    )
+
+
+# --------------------------------------------------------------------------- #
+# Taxonomy
+# --------------------------------------------------------------------------- #
+def test_parse_taxonomy_lineages():
+    cats = parse_taxonomy(DUMP / "taxonomy")
+    assert cats["bsu"] == ["Prokaryotes", "Bacteria", "Firmicutes"]
+    assert cats["hsa"][0] == "Eukaryotes"
+    assert cats["eco"][1] == "Bacteria"
+
+
+def test_organism_domains():
+    assert organism_domains(DUMP / "taxonomy") == {
+        "bsu": "Prokaryotes",
+        "eco": "Prokaryotes",
+        "hsa": "Eukaryotes",
+    }
+
+
+def test_organisms_in_domain_prefix_match():
+    assert organisms_in_domain(DUMP / "taxonomy", "prok") == {"bsu", "eco"}
+    assert organisms_in_domain(DUMP / "taxonomy", "Eukaryotes") == {"hsa"}
+
+
+def test_parse_taxonomy_handles_skipped_depth(tmp_path):
+    """A ``##`` directly under a ``#`` (skipping ``##`` level) used to corrupt
+    the stack. Now pads with '' placeholders and warns once (known_issues.md C4)."""
+    p = tmp_path / "tax"
+    p.write_text(
+        "#Domain1\n"
+        "###Skipped\n"          # skips ##
+        "T9999\torg1\tan org\n"
+    )
+    with pytest.warns(UserWarning, match="depth skips a level"):
+        cats = parse_taxonomy(p)
+    # Domain still recoverable; the missing level is a placeholder.
+    assert cats["org1"][0] == "Domain1"
+    assert cats["org1"][-1] == "Skipped"
+
+
+# --------------------------------------------------------------------------- #
+# build_ko_fastas (constructMultiFasta)
+# --------------------------------------------------------------------------- #
+def test_build_ko_fastas_groups_by_ko(organism_gene_ko, tmp_path):
+    written = build_ko_fastas(organism_gene_ko, DUMP / "genes.pep", tmp_path)
+    assert set(written) == {"K01194", "K00002"}
+    k01194 = (tmp_path / "K01194.fa").read_text()
+    assert k01194.count(">") == 4  # bsu x2 + hsa x2
+    assert ">bsu:BSU31050" in k01194
+    assert ">xxx:unused" not in k01194  # gene not in any KO is excluded
+
+
+def test_build_ko_fastas_domain_filter(organism_gene_ko, tmp_path):
+    prok = organisms_in_domain(DUMP / "taxonomy", "prokaryotes")
+    written = build_ko_fastas(organism_gene_ko, DUMP / "genes.pep", tmp_path, organisms=prok)
+    # Only prokaryote genes: K01194 keeps bsu (2), K00002 keeps eco (1).
+    assert (tmp_path / "K01194.fa").read_text().count(">") == 2
+    assert ">hsa:" not in (tmp_path / "K01194.fa").read_text()
+    assert set(written) == {"K01194", "K00002"}
+
+
+def test_build_ko_fastas_sequences_intact(organism_gene_ko, tmp_path):
+    build_ko_fastas(organism_gene_ko, DUMP / "genes.pep", tmp_path)
+    text = (tmp_path / "K00002.fa").read_text()
+    assert text.startswith(">eco:b0001")
+    assert "MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA" in text
+
+
+# --------------------------------------------------------------------------- #
+# Command builders / CD-HIT word size (pure)
+# --------------------------------------------------------------------------- #
+@pytest.mark.parametrize(
+    "identity, expected",
+    [(0.9, "5"), (0.7, "4"), (0.65, "4"), (0.55, "3"), (0.45, "2")],
+)
+def test_cdhit_word_size(identity, expected):
+    assert _cdhit_word_size(identity) == expected
+
+
+def test_cdhit_word_size_out_of_range():
+    with pytest.raises(ValueError, match="seq_identity"):
+        _cdhit_word_size(0.3)
+
+
+def test_command_builders():
+    cd = _cdhit_cmd("cd-hit", Path("in.fa"), Path("out.fa"), 0.9, 4)
+    assert cd[:3] == ["cd-hit", "-i", "in.fa"]
+    assert "-c" in cd and "0.9" in cd and "-n" in cd and "5" in cd
+    # Default is fast progressive (FFT-NS-2), not --auto.
+    assert _mafft_cmd("mafft", Path("in.fa"), 2) == [
+        "mafft", "--retree", "2", "--maxiterate", "0", "--anysymbol", "--thread", "2", "in.fa"
+    ]
+    assert _mafft_cmd("mafft", Path("in.fa"), 2, fast=False)[:2] == ["mafft", "--auto"]
+    assert "--parttree" in _mafft_cmd("mafft", Path("in.fa"), 2, parttree=True)
+    assert _hmmbuild_cmd("hmmbuild", Path("o.hmm"), Path("a.fa"), 3) == [
+        "hmmbuild", "--cpu", "3", "o.hmm", "a.fa"
+    ]
+
+
+# --------------------------------------------------------------------------- #
+# build_ko_hmm orchestration (binaries mocked)
+# --------------------------------------------------------------------------- #
+def test_build_ko_hmm_multi_sequence_runs_full_pipeline(tmp_path, monkeypatch):
+    fasta = tmp_path / "K01194.fa"
+    fasta.write_text(">a\nMKV\n>b\nMRV\n")
+    calls = []
+
+    monkeypatch.setattr(
+        "raven_python.reconstruction.kegg.hmm.resolve_binary",
+        lambda exe, binary=None: binary or exe,
+    )
+
+    def fake_run(cmd, *, stdout_path=None):
+        calls.append(Path(cmd[0]).name)
+        # Emulate each tool producing its expected output file.
+        if stdout_path is not None:
+            Path(stdout_path).write_text(">a\nMKV\n>b\nMRV\n")
+        if Path(cmd[0]).name == "cd-hit":
+            Path(cmd[cmd.index("-o") + 1]).write_text(">a\nMKV\n>b\nMRV\n")
+        if Path(cmd[0]).name == "hmmbuild":
+            Path(cmd[-2]).write_text("HMM\n")
+        return ""
+
+    monkeypatch.setattr("raven_python.reconstruction.kegg.hmm._run", fake_run)
+    out = build_ko_hmm(fasta, tmp_path / "K01194.hmm")
+    assert calls == ["cd-hit", "mafft", "hmmbuild"]
+    assert out.read_text() == "HMM\n"
+
+
+def test_build_ko_hmm_single_sequence_skips_align(tmp_path, monkeypatch):
+    fasta = tmp_path / "K9.fa"
+    fasta.write_text(">only\nMKV\n")
+    calls = []
+    monkeypatch.setattr(
+        "raven_python.reconstruction.kegg.hmm.resolve_binary",
+        lambda exe, binary=None: binary or exe,
+    )
+
+    def fake_run(cmd, *, stdout_path=None):
+        calls.append(Path(cmd[0]).name)
+        if Path(cmd[0]).name == "hmmbuild":
+            Path(cmd[-2]).write_text("HMM\n")
+        return ""
+
+    monkeypatch.setattr("raven_python.reconstruction.kegg.hmm._run", fake_run)
+    build_ko_hmm(fasta, tmp_path / "K9.hmm")
+    assert calls == ["hmmbuild"]  # no cd-hit / mafft for a lone sequence
+
+
+def test_build_ko_hmm_verbose_logs_each_stage(tmp_path, monkeypatch, caplog):
+    fasta = tmp_path / "K01194.fa"
+    fasta.write_text(">a\nMKV\n>b\nMRV\n")
+    monkeypatch.setattr(
+        "raven_python.reconstruction.kegg.hmm.resolve_binary", lambda exe, binary=None: binary or exe
+    )
+
+    def fake_run(cmd, *, stdout_path=None):
+        if stdout_path is not None:
+            Path(stdout_path).write_text(">a\nMKV\n>b\nMRV\n")
+        if Path(cmd[0]).name == "cd-hit":
+            Path(cmd[cmd.index("-o") + 1]).write_text(">a\nMKV\n>b\nMRV\n")
+        if Path(cmd[0]).name == "hmmbuild":
+            Path(cmd[-2]).write_text("HMM\n")
+        return ""
+
+    monkeypatch.setattr("raven_python.reconstruction.kegg.hmm._run", fake_run)
+    with caplog.at_level("INFO", logger="raven_python.reconstruction.kegg.hmm"):
+        build_ko_hmm(fasta, tmp_path / "K01194.hmm", verbose=True)
+    text = caplog.text
+    # Each stage is logged, labelled with the KO id.
+    assert "[K01194] start: 2 sequences" in text
+    assert "[K01194] CD-HIT" in text
+    assert "[K01194] MAFFT" in text
+    assert "[K01194] hmmbuild: done in" in text
+    # Each stage is a single line: the tool/params and the timing together, not split.
+    assert "running" not in text
+    assert "[K01194] complete" in text
+
+
+def test_build_ko_hmm_quiet_by_default(tmp_path, monkeypatch, caplog):
+    fasta = tmp_path / "K9.fa"
+    fasta.write_text(">only\nMKV\n")
+    monkeypatch.setattr(
+        "raven_python.reconstruction.kegg.hmm.resolve_binary", lambda exe, binary=None: binary or exe
+    )
+    monkeypatch.setattr(
+        "raven_python.reconstruction.kegg.hmm._run",
+        lambda cmd, *, stdout_path=None: Path(cmd[-2]).write_text("HMM\n") and "",
+    )
+    with caplog.at_level("INFO", logger="raven_python.reconstruction.kegg.hmm"):
+        build_ko_hmm(fasta, tmp_path / "K9.hmm")  # verbose defaults False
+    assert caplog.text == ""
+
+
+def test_fasta_stats_counts_residues(tmp_path):
+    fa = tmp_path / "x.fa"
+    fa.write_text(">a\nMKVL\nAAG\n>b\nMR\n")  # a=7 residues (2 lines), b=2
+    assert _fasta_stats(fa) == (2, 9)
+
+
+def test_auto_cost_budget_scales_with_memory(monkeypatch):
+    hmm_mod._auto_cost_budget.cache_clear()
+    monkeypatch.setattr(hmm_mod, "_total_memory_bytes", lambda: 64 * 1024**3)
+    big = hmm_mod._auto_cost_budget()
+    hmm_mod._auto_cost_budget.cache_clear()
+    monkeypatch.setattr(hmm_mod, "_total_memory_bytes", lambda: 8 * 1024**3)
+    small = hmm_mod._auto_cost_budget()
+    assert big > small > 0  # more RAM -> larger DP-cost budget
+    hmm_mod._auto_cost_budget.cache_clear()
+
+
+def test_auto_cost_budget_warns_on_low_memory(monkeypatch, caplog):
+    hmm_mod._auto_cost_budget.cache_clear()
+    monkeypatch.setattr(hmm_mod, "_total_memory_bytes", lambda: 7 * 1024**3)
+    with caplog.at_level("WARNING", logger="raven_python.reconstruction.kegg.hmm"):
+        hmm_mod._auto_cost_budget()
+    assert "Limited memory" in caplog.text
+    hmm_mod._auto_cost_budget.cache_clear()
+
+
+def test_auto_cost_budget_falls_back_without_detection(monkeypatch, caplog):
+    hmm_mod._auto_cost_budget.cache_clear()
+    monkeypatch.setattr(hmm_mod, "_total_memory_bytes", lambda: None)
+    with caplog.at_level("WARNING", logger="raven_python.reconstruction.kegg.hmm"):
+        assert hmm_mod._auto_cost_budget() == hmm_mod._DEFAULT_COST_BUDGET
+    assert "Could not detect system memory" in caplog.text
+    hmm_mod._auto_cost_budget.cache_clear()
+
+
+def test_long_proteins_route_to_parttree(monkeypatch, tmp_path):
+    # Few but very long sequences (K12047-like): low residue count, high DP cost,
+    # so the length-aware budget must pick PartTree (a residue-only rule would not).
+    fasta = tmp_path / "K12047.fa"
+    fasta.write_text("".join(f">g{i}\n{'M' * 2000}\n" for i in range(300)))  # 300 x 2000 aa
+    monkeypatch.setattr(hmm_mod, "resolve_binary", lambda exe, binary=None: binary or exe)
+    hmm_mod._auto_cost_budget.cache_clear()
+    monkeypatch.setattr(hmm_mod, "_total_memory_bytes", lambda: 8 * 1024**3)
+    seen = {}
+
+    def fake_run(cmd, *, stdout_path=None):
+        name = Path(cmd[0]).name
+        if name == "cd-hit":
+            Path(cmd[cmd.index("-o") + 1]).write_text(fasta.read_text())
+        if name == "mafft":
+            seen["parttree"] = "--parttree" in cmd
+            Path(stdout_path).write_text(fasta.read_text())
+        if name == "hmmbuild":
+            Path(cmd[-2]).write_text("HMM\n")
+        return ""
+
+    monkeypatch.setattr(hmm_mod, "_run", fake_run)
+    build_ko_hmm(fasta, tmp_path / "K12047.hmm")
+    hmm_mod._auto_cost_budget.cache_clear()
+    # 300x2000 = 600k residues (a residue rule with a ~1M cutoff would NOT trigger),
+    # but DP cost 1.2e9 exceeds the 8 GB budget -> PartTree.
+    assert seen["parttree"] is True
+
+
+def test_parttree_residues_param_overrides_auto(tmp_path, monkeypatch):
+    # The explicit parttree_residues argument decides the MAFFT method (residues only).
+    fasta = tmp_path / "K.fa"
+    fasta.write_text("".join(f">g{i}\n{'M' * 1000}\n" for i in range(5)))  # 5000 residues
+    monkeypatch.setattr(hmm_mod, "resolve_binary", lambda exe, binary=None: binary or exe)
+    seen = {}
+
+    def fake_run(cmd, *, stdout_path=None):
+        name = Path(cmd[0]).name
+        if name == "cd-hit":
+            Path(cmd[cmd.index("-o") + 1]).write_text(fasta.read_text())
+        if name == "mafft":
+            seen["parttree"] = "--parttree" in cmd
+            Path(stdout_path).write_text(fasta.read_text())
+        if name == "hmmbuild":
+            Path(cmd[-2]).write_text("HMM\n")
+        return ""
+
+    monkeypatch.setattr(hmm_mod, "_run", fake_run)
+    build_ko_hmm(fasta, tmp_path / "a.hmm", parttree_residues=10_000)  # 5000 < 10000
+    assert seen["parttree"] is False  # stays on FFT-NS-2
+    build_ko_hmm(fasta, tmp_path / "b.hmm", parttree_residues=4000)  # 5000 > 4000
+    assert seen["parttree"] is True  # switches to PartTree
+
+
+def test_build_ko_hmm_empty_fasta_raises(tmp_path):
+    fasta = tmp_path / "empty.fa"
+    fasta.write_text("")
+    with pytest.raises(ValueError, match="no sequences"):
+        build_ko_hmm(fasta, tmp_path / "empty.hmm")
diff --git a/tests/test_reconstruction_kegg_organism.py b/tests/test_reconstruction_kegg_organism.py
new file mode 100644
index 0000000..f64f15b
--- /dev/null
+++ b/tests/test_reconstruction_kegg_organism.py
@@ -0,0 +1,179 @@
+"""Tests for get_kegg_model_for_organism (KEGG organism-ID mode, step 3b.4)."""
+from pathlib import Path
+
+import cobra
+import pandas as pd
+import pytest
+
+from raven_python.reconstruction.kegg import (
+    build_kegg_tables,
+    build_reference_model,
+    get_kegg_model_for_organism,
+    get_kegg_model_for_organism_from_artefacts,
+    parse_kegg_compounds,
+    parse_kegg_dump,
+    parse_kegg_reactions,
+)
+
+DUMP = Path(__file__).parent / "data" / "kegg_dump"
+
+
+@pytest.fixture(scope="module")
+def artefacts():
+    reactions = parse_kegg_reactions(DUMP)
+    compounds = parse_kegg_compounds(DUMP)
+    linked = {ko for r in reactions for ko in r.kos}
+    from raven_python.reconstruction.kegg import parse_kegg_kos
+
+    kos = parse_kegg_kos(DUMP, keep=linked)
+    model = build_reference_model(reactions, compounds)
+    tables = build_kegg_tables(reactions, kos)
+    return model, tables
+
+
+def _build(artefacts, organism_id, **kw):
+    model, tables = artefacts
+    return get_kegg_model_for_organism(
+        organism_id,
+        model,
+        tables["ko_reaction"],
+        tables["organism_gene_ko"],
+        rxn_flags=tables["rxn_flags"],
+        **kw,
+    )
+
+
+# --------------------------------------------------------------------------- #
+# Core behaviour
+# --------------------------------------------------------------------------- #
+def test_eco_keeps_only_its_reactions(artefacts):
+    # eco has b0001 -> K00002 -> R00100 only.
+    model = _build(artefacts, "eco")
+    assert {r.id for r in model.reactions} == {"R00100"}
+    assert model.id == "eco"
+
+
+def test_eco_gpr_and_gene_annotation(artefacts):
+    model = _build(artefacts, "eco")
+    r = model.reactions.get_by_id("R00100")
+    assert r.gene_reaction_rule == "b0001"
+    assert model.genes.get_by_id("b0001").annotation["kegg.genes"] == "eco:b0001"
+    assert r.notes["note"].startswith("Included by get_kegg_model_for_organism")
+
+
+def test_bsu_or_joins_multiple_genes(artefacts):
+    # bsu has BSU31050 + BSU31060, both -> K01194 -> R00010.
+    model = _build(artefacts, "bsu")
+    r = model.reactions.get_by_id("R00010")
+    assert set(r.genes) == {model.genes.get_by_id("BSU31050"), model.genes.get_by_id("BSU31060")}
+    assert r.gene_reaction_rule == "BSU31050 or BSU31060"
+
+
+def test_case_insensitive_organism(artefacts):
+    assert "R00010" in _build(artefacts, "BSU").reactions
+
+
+def test_orphan_metabolites_pruned(artefacts):
+    # eco keeps only R00100 (C00002, C00003); trehalose/glucose mets should go.
+    model = _build(artefacts, "eco")
+    assert {m.id for m in model.metabolites} == {"C00002", "C00003"}
+
+
+def test_reference_model_unmodified(artefacts):
+    reference, _ = artefacts
+    before = len(reference.reactions)
+    _build(artefacts, "eco")
+    assert len(reference.reactions) == before  # worked on a copy
+    assert len(reference.genes) == 0
+
+
+# --------------------------------------------------------------------------- #
+# Spontaneous handling
+# --------------------------------------------------------------------------- #
+def test_spontaneous_reaction_kept_without_genes(artefacts):
+    # R00100 is spontaneous; for bsu it has no genes but is kept (no GPR).
+    model = _build(artefacts, "bsu", keep_spontaneous=True)
+    assert "R00100" in model.reactions
+    assert model.reactions.get_by_id("R00100").gene_reaction_rule == ""
+
+
+def test_spontaneous_dropped_when_disabled(artefacts):
+    model = _build(artefacts, "bsu", keep_spontaneous=False)
+    assert "R00100" not in model.reactions
+    assert "R00010" in model.reactions  # the gene-backed reaction stays
+
+
+# --------------------------------------------------------------------------- #
+# Quality filters take precedence over having genes
+# --------------------------------------------------------------------------- #
+def _tiny_general_case():
+    ref = cobra.Model("KEGG")
+    a = cobra.Metabolite("C1", compartment="s")
+    b = cobra.Metabolite("C2", compartment="s")
+    ref.add_metabolites([a, b])
+    rxn = cobra.Reaction("R1")
+    ref.add_reactions([rxn])
+    rxn.add_metabolites({a: -1, b: 1})
+    ko_reaction = pd.DataFrame([("K1", "R1")], columns=["ko", "reaction"])
+    ogk = pd.DataFrame([("xyz", "g1", "K1")], columns=["organism", "gene", "ko"])
+    flags = pd.DataFrame(
+        [("R1", False, False, False, True)],
+        columns=["reaction", "spontaneous", "undefined_stoich", "incomplete", "general"],
+    )
+    return ref, ko_reaction, ogk, flags
+
+
+def test_general_filter_drops_reaction_with_genes():
+    ref, ko_reaction, ogk, flags = _tiny_general_case()
+    model = get_kegg_model_for_organism("xyz", ref, ko_reaction, ogk, rxn_flags=flags)
+    assert "R1" not in model.reactions  # general + keep_general=False (default)
+
+
+def test_general_kept_when_enabled():
+    ref, ko_reaction, ogk, flags = _tiny_general_case()
+    model = get_kegg_model_for_organism(
+        "xyz", ref, ko_reaction, ogk, rxn_flags=flags, keep_general=True
+    )
+    assert model.reactions.get_by_id("R1").gene_reaction_rule == "g1"
+
+
+# --------------------------------------------------------------------------- #
+# Validation + artefact loading
+# --------------------------------------------------------------------------- #
+def test_unknown_organism_raises(artefacts):
+    with pytest.raises(ValueError, match="no genes"):
+        _build(artefacts, "zzz")
+
+
+def test_domain_mode_needs_taxonomy(artefacts):
+    with pytest.raises(ValueError, match="taxonomy"):
+        _build(artefacts, "eukaryotes")
+
+
+def test_domain_mode_keeps_all_domain_organisms(artefacts):
+    # Prokaryotes (bsu + eco) -> R00010 (bsu genes) and R00100 (eco gene).
+    model = _build(artefacts, "prokaryotes", taxonomy=DUMP / "taxonomy")
+    assert "R00010" in model.reactions
+    assert "R00100" in model.reactions
+    # Genes are organism-qualified in domain mode to stay distinct.
+    assert {g.id for g in model.reactions.get_by_id("R00010").genes} == {
+        "bsu:BSU31050",
+        "bsu:BSU31060",
+    }
+
+
+def test_domain_mode_eukaryotes(artefacts):
+    # Eukaryotes (hsa) -> R00010 via hsa:124/125; eco-only R00100 absent of genes
+    # but it is spontaneous, so kept without GPR.
+    model = _build(artefacts, "eukaryotes", taxonomy=DUMP / "taxonomy")
+    assert {g.id for g in model.reactions.get_by_id("R00010").genes} == {
+        "hsa:124",
+        "hsa:125",
+    }
+
+
+def test_from_artefacts_roundtrip(tmp_path):
+    parse_kegg_dump(DUMP, tmp_path)
+    model = get_kegg_model_for_organism_from_artefacts("eco", tmp_path)
+    assert {r.id for r in model.reactions} == {"R00100"}
+    assert model.reactions.get_by_id("R00100").gene_reaction_rule == "b0001"
diff --git a/tests/test_reconstruction_kegg_parse.py b/tests/test_reconstruction_kegg_parse.py
new file mode 100644
index 0000000..23d8f71
--- /dev/null
+++ b/tests/test_reconstruction_kegg_parse.py
@@ -0,0 +1,220 @@
+"""Tests for the KEGG dump parser (reconstruction/kegg/parse.py, step 3b.2)."""
+from pathlib import Path
+
+import pytest
+
+from raven_python.reconstruction.kegg import (
+    build_kegg_tables,
+    build_reference_model,
+    parse_kegg_compounds,
+    parse_kegg_dump,
+    parse_kegg_kos,
+    parse_kegg_reactions,
+    read_kegg_table,
+    write_kegg_tables,
+)
+
+DUMP = Path(__file__).parent / "data" / "kegg_dump"
+
+
+@pytest.fixture(scope="module")
+def reactions():
+    return parse_kegg_reactions(DUMP)
+
+
+@pytest.fixture(scope="module")
+def compounds():
+    return parse_kegg_compounds(DUMP)
+
+
+@pytest.fixture(scope="module")
+def kos():
+    linked = {ko for r in parse_kegg_reactions(DUMP) for ko in r.kos}
+    return parse_kegg_kos(DUMP, keep=linked)
+
+
+# --------------------------------------------------------------------------- #
+# Reactions
+# --------------------------------------------------------------------------- #
+def test_reactions_parsed(reactions):
+    assert {r.id for r in reactions} == {"R00010", "R00100", "R00200", "R00300", "R00400"}
+
+
+def test_reaction_fields(reactions):
+    r = next(r for r in reactions if r.id == "R00010")
+    assert r.name == "alpha,alpha-trehalose glucohydrolase"
+    assert r.eccodes == ["3.2.1.28"]
+    assert r.kos == ["K01194"]
+    # rn01100 is an overview map and must be skipped.
+    assert r.pathways == ["rn00500"]
+
+
+def test_stoichiometry_cached(reactions):
+    """parse_kegg_reactions populates the cached stoichiometry so
+    build_reference_model doesn't have to re-parse (known_issues.md D2)."""
+    r = next(r for r in reactions if r.id == "R00010")
+    assert r.stoichiometry  # non-empty
+    # Reactants negative, products positive.
+    assert all(c != 0 for c in r.stoichiometry.values())
+    assert any(c < 0 for c in r.stoichiometry.values())
+    assert any(c > 0 for c in r.stoichiometry.values())
+
+
+def test_spontaneous_flag(reactions):
+    assert next(r for r in reactions if r.id == "R00100").spontaneous
+    assert not next(r for r in reactions if r.id == "R00010").spontaneous
+
+
+def test_general_flag(reactions):
+    assert next(r for r in reactions if r.id == "R00300").general
+
+
+def test_undefined_stoich_flag(reactions):
+    assert next(r for r in reactions if r.id == "R00200").undefined_stoich
+    assert not next(r for r in reactions if r.id == "R00010").undefined_stoich
+
+
+def test_mapformula_makes_irreversible(reactions):
+    # R00100 is drawn one direction in its only map -> irreversible.
+    assert not next(r for r in reactions if r.id == "R00100").reversible
+    # R00010 is drawn in conflicting directions across maps -> stays reversible.
+    assert next(r for r in reactions if r.id == "R00010").reversible
+
+
+# --------------------------------------------------------------------------- #
+# Compounds
+# --------------------------------------------------------------------------- #
+def test_compound_first_name_only(compounds):
+    water = next(c for c in compounds if c.id == "C00001")
+    assert water.name == "H2O"
+    assert water.chebi == ["CHEBI:15377"]
+    assert water.pubchem == ["3303"]
+
+
+def test_inchi_overrides_formula(compounds):
+    glucose = next(c for c in compounds if c.id == "C00031")
+    assert glucose.inchi.startswith("InChI=")
+    assert glucose.formula == ""  # cleared when an InChI is available
+    assert glucose.chebi == ["CHEBI:4167", "CHEBI:17634"]
+
+
+# --------------------------------------------------------------------------- #
+# KOs / genes
+# --------------------------------------------------------------------------- #
+def test_kos_limited_to_keep(kos):
+    # K99999 is unlinked (excluded by keep); K09999 is referenced but absent.
+    assert {ko.id for ko in kos} == {"K01194", "K00002"}
+
+
+def test_ko_genes_lowercased_and_stripped(kos):
+    k = next(ko for ko in kos if ko.id == "K01194")
+    assert k.name == "alpha,alpha-trehalase [EC:3.2.1.28]"
+    assert ("bsu", "BSU31050") in k.genes  # '(gbsB)' suffix stripped, org lowercased
+    assert ("hsa", "125") in k.genes
+
+
+# --------------------------------------------------------------------------- #
+# Reference model
+# --------------------------------------------------------------------------- #
+def test_reference_model_is_gene_free(reactions, compounds):
+    model = build_reference_model(reactions, compounds)
+    assert len(model.genes) == 0
+    for rxn in model.reactions:
+        assert rxn.gene_reaction_rule == ""
+
+
+def test_empty_reaction_dropped(reactions, compounds):
+    model = build_reference_model(reactions, compounds)
+    assert "R00400" not in model.reactions  # C00007 <=> C00007 cancels out
+    assert "C00007" not in model.metabolites  # and its only metabolite is unused
+
+
+def test_reaction_bounds_follow_reversibility(reactions, compounds):
+    model = build_reference_model(reactions, compounds)
+    assert model.reactions.get_by_id("R00010").bounds == (-1000.0, 1000.0)
+    assert model.reactions.get_by_id("R00100").bounds == (0.0, 1000.0)
+
+
+def test_reaction_stoichiometry_and_annotation(reactions, compounds):
+    model = build_reference_model(reactions, compounds)
+    r = model.reactions.get_by_id("R00010")
+    coefs = {m.id: c for m, c in r.metabolites.items()}
+    assert coefs == {"C01083": -1.0, "C00001": -1.0, "C00031": 2.0}
+    assert r.annotation["kegg.orthology"] == ["K01194"]
+    assert r.annotation["ec-code"] == ["3.2.1.28"]
+
+
+def test_metabolite_annotation(reactions, compounds):
+    model = build_reference_model(reactions, compounds)
+    glucose = model.metabolites.get_by_id("C00031")
+    assert glucose.name == "D-Glucose"
+    assert glucose.annotation["inchi"].startswith("InChI=")
+
+
+# --------------------------------------------------------------------------- #
+# Tables
+# --------------------------------------------------------------------------- #
+def test_ko_reaction_table(reactions, kos):
+    tables = build_kegg_tables(reactions, kos)
+    pairs = set(map(tuple, tables["ko_reaction"].to_numpy()))
+    assert ("K01194", "R00010") in pairs
+    assert ("K09999", "R00300") in pairs  # kept even though KO entry is missing
+
+
+def test_organism_gene_ko_table(reactions, kos):
+    tables = build_kegg_tables(reactions, kos)
+    rows = set(map(tuple, tables["organism_gene_ko"].to_numpy()))
+    assert ("bsu", "BSU31050", "K01194") in rows
+    assert ("eco", "b0001", "K00002") in rows
+    assert len(rows) == 5
+
+
+def test_rxn_flags_table(reactions, kos):
+    tables = build_kegg_tables(reactions, kos)
+    flags = tables["rxn_flags"].set_index("reaction")
+    assert bool(flags.loc["R00100", "spontaneous"])
+    assert bool(flags.loc["R00200", "undefined_stoich"])
+    assert bool(flags.loc["R00300", "general"])
+    assert not bool(flags.loc["R00010", "spontaneous"])
+
+
+# --------------------------------------------------------------------------- #
+# Round-trip + orchestrator
+# --------------------------------------------------------------------------- #
+def test_tables_roundtrip_gzipped_tsv(reactions, kos, tmp_path):
+    tables = build_kegg_tables(reactions, kos)
+    paths = write_kegg_tables(tables, tmp_path)
+    assert all(p.name.endswith(".tsv.gz") for p in paths)
+    back = read_kegg_table(tmp_path / "ko_reaction.tsv.gz")
+    assert set(map(tuple, back.to_numpy())) == set(map(tuple, tables["ko_reaction"].to_numpy()))
+
+
+def test_parse_kegg_dump_writes_artefacts(tmp_path):
+    paths = parse_kegg_dump(DUMP, tmp_path)
+    assert set(paths) >= {
+        "ko_reaction", "ko_names", "organism_gene_ko", "rxn_flags", "reference_model"
+    }
+    assert (tmp_path / "reference_model.yml.gz").is_file()
+    # organism_gene_ko is streamed to a sorted, xz-compressed TSV.
+    assert paths["organism_gene_ko"].name == "organism_gene_ko.tsv.xz"
+    ogk = read_kegg_table(paths["organism_gene_ko"])
+    assert set(ogk.columns) == {"organism", "gene", "ko"}
+    assert ("eco", "b0001", "K00002") in set(map(tuple, ogk.to_numpy()))
+    # Rows are sorted by (organism, gene) — the property that makes them compress.
+    keys = list(zip(ogk["organism"], ogk["gene"], strict=True))
+    assert keys == sorted(keys)
+
+
+def test_stream_organism_gene_ko_external_merge(tmp_path):
+    """A tiny chunk_rows forces multiple sorted runs to be merged; output stays sorted."""
+    from raven_python.reconstruction.kegg.parse import stream_organism_gene_ko
+
+    out = tmp_path / "organism_gene_ko.tsv.xz"
+    keep = {ko.id for ko in parse_kegg_kos(DUMP)}
+    names = stream_organism_gene_ko(DUMP, keep, out, chunk_rows=1)
+    assert out.is_file() and not list(tmp_path.glob("ogk_sort_*"))  # temp dir cleaned up
+    ogk = read_kegg_table(out)
+    keys = list(zip(ogk["organism"], ogk["gene"], strict=True))
+    assert keys == sorted(keys)
+    assert ("eco", "b0001", "K00002") in set(map(tuple, ogk.to_numpy()))
+    assert set(names.columns) == {"ko", "name"}
diff --git a/tests/test_reconstruction_kegg_query.py b/tests/test_reconstruction_kegg_query.py
new file mode 100644
index 0000000..49aae60
--- /dev/null
+++ b/tests/test_reconstruction_kegg_query.py
@@ -0,0 +1,132 @@
+"""Tests for the KEGG HMM-query path (reconstruction/kegg/query.py, step 3b.5)."""
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from raven_python.reconstruction.kegg import (
+    assign_kos,
+    build_kegg_tables,
+    build_reference_model,
+    get_kegg_model_from_sequences,
+    parse_hmmscan_tblout,
+    parse_kegg_compounds,
+    parse_kegg_kos,
+    parse_kegg_reactions,
+)
+
+DUMP = Path(__file__).parent / "data" / "kegg_dump"
+
+# A minimal hmmscan --tblout excerpt: target(KO) accession query(gene) ... evalue ...
+TBLOUT = """\
+#                                                               --- full sequence ----
+# target name        accession  query name  accession   E-value  score  bias
+#------------------- ---------- ----------- ---------- --------- ------ -----
+K01194               -          gene1       -          1e-120     400.0   0.0
+K01194               -          gene2       -          1e-100     350.0   0.0
+K00002               -          gene1       -          1e-10      40.0    0.0
+"""
+
+
+# --------------------------------------------------------------------------- #
+# Parsing
+# --------------------------------------------------------------------------- #
+def test_parse_tblout_skips_comments():
+    hits = parse_hmmscan_tblout(TBLOUT)
+    assert list(hits.columns) == ["ko", "gene", "evalue"]
+    assert len(hits) == 3
+    assert set(hits["ko"]) == {"K01194", "K00002"}
+    assert hits.iloc[0]["evalue"] == 1e-120
+
+
+def test_parse_tblout_empty():
+    assert parse_hmmscan_tblout("# only a header\n").empty
+
+
+# --------------------------------------------------------------------------- #
+# assign_kos scoring/filters
+# --------------------------------------------------------------------------- #
+def test_cutoff_excludes_weak_hits():
+    hits = parse_hmmscan_tblout(TBLOUT)
+    # gene1->K00002 has evalue 1e-10, above the default cutoff 1e-30: dropped.
+    assigned = assign_kos(hits)
+    assert "K00002" not in assigned
+    assert set(assigned["K01194"]) == {"gene1", "gene2"}
+
+
+def test_loose_cutoff_keeps_hit():
+    hits = parse_hmmscan_tblout(TBLOUT)
+    assigned = assign_kos(hits, cutoff=1e-5, min_score_ratio_g=0.0, min_score_ratio_ko=0.0)
+    assert assigned.get("K00002") == ["gene1"]
+
+
+def test_min_score_ratio_ko_prunes_weak_member():
+    # In one KO: best 1e-200, weak 1e-20. log(1e-20)/log(1e-200)=0.1 < 0.3 -> pruned.
+    hits = pd.DataFrame(
+        [("K1", "strong", 1e-200), ("K1", "weak", 1e-20)],
+        columns=["ko", "gene", "evalue"],
+    )
+    assigned = assign_kos(hits, cutoff=1e-5, min_score_ratio_ko=0.3, min_score_ratio_g=0.0)
+    assert assigned["K1"] == ["strong"]
+
+
+def test_min_score_ratio_g_keeps_gene_in_best_ko_only():
+    # gene g hits K1 strongly (1e-200) and K2 weakly (1e-20).
+    # For the gene: log(1e-20)/log(1e-200)=0.1 < 0.8 -> K2 assignment dropped.
+    hits = pd.DataFrame(
+        [("K1", "g", 1e-200), ("K2", "g", 1e-20)],
+        columns=["ko", "gene", "evalue"],
+    )
+    assigned = assign_kos(hits, cutoff=1e-5, min_score_ratio_ko=0.0, min_score_ratio_g=0.8)
+    assert assigned == {"K1": ["g"]}
+
+
+def test_zero_evalue_does_not_crash():
+    hits = pd.DataFrame([("K1", "g", 0.0)], columns=["ko", "gene", "evalue"])
+    assert assign_kos(hits) == {"K1": ["g"]}
+
+
+def test_cutoff_ge_one_rejected():
+    """cutoff >= 1 would let log(best_evalue)=0 through and ZeroDivisionError later
+    (known_issues.md A6). Reject up front with a clear message."""
+    hits = pd.DataFrame([("K1", "g", 0.5)], columns=["ko", "gene", "evalue"])
+    with pytest.raises(ValueError, match="cutoff must be < 1"):
+        assign_kos(hits, cutoff=1.0)
+
+
+# --------------------------------------------------------------------------- #
+# Model assembly via the HMM path (hmmscan mocked)
+# --------------------------------------------------------------------------- #
+@pytest.fixture(scope="module")
+def reference_and_tables():
+    reactions = parse_kegg_reactions(DUMP)
+    compounds = parse_kegg_compounds(DUMP)
+    linked = {ko for r in reactions for ko in r.kos}
+    kos = parse_kegg_kos(DUMP, keep=linked)
+    return build_reference_model(reactions, compounds), build_kegg_tables(reactions, kos)
+
+
+def test_get_model_from_sequences(reference_and_tables, monkeypatch):
+    model_ref, tables = reference_and_tables
+    # Mock the HMM search: K01194 -> myGeneA/myGeneB (-> R00010).
+    monkeypatch.setattr(
+        "raven_python.reconstruction.kegg.query.run_hmmscan",
+        lambda *a, **k: (
+            "K01194 - myGeneA - 1e-120 400 0\n"
+            "K01194 - myGeneB - 1e-110 380 0\n"
+        ),
+    )
+    model = get_kegg_model_from_sequences(
+        "ignored.fasta",
+        model_ref,
+        tables["ko_reaction"],
+        "ignored.hmm",
+        rxn_flags=tables["rxn_flags"],
+        model_id="myorg",
+    )
+    assert model.id == "myorg"
+    r = model.reactions.get_by_id("R00010")
+    assert set(r.gene_reaction_rule.split(" or ")) == {"myGeneA", "myGeneB"}
+    assert r.notes["note"].endswith("(using HMMs)")
+    # R00200/R00300 had no matched KOs and are not spontaneous -> absent.
+    assert "R00200" not in model.reactions
diff --git a/tests/test_scripts_registry.py b/tests/test_scripts_registry.py
new file mode 100644
index 0000000..c9c03cf
--- /dev/null
+++ b/tests/test_scripts_registry.py
@@ -0,0 +1,58 @@
+"""Tests for scripts/make_registry_snippet.py registry-entry helpers."""
+import hashlib
+import importlib.util
+import json
+from pathlib import Path
+
+import pytest
+
+# scripts/ is not a package; load the module directly by path.
+_SCRIPT = Path(__file__).resolve().parents[1] / "scripts" / "make_registry_snippet.py"
+_spec = importlib.util.spec_from_file_location("make_registry_snippet", _SCRIPT)
+mrs = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(mrs)
+
+
+def _sha(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def test_data_entry_lists_files_with_urls_and_checksums(tmp_path):
+    (tmp_path / "reference_model.yml.gz").write_bytes(b"model")
+    (tmp_path / "ko_reaction.tsv.gz").write_bytes(b"table")
+    (tmp_path / ".hidden").write_bytes(b"skip")  # hidden files ignored
+
+    entry = mrs.data_entry("kegg", "kegg116", "https://x/rel/", tmp_path)
+    assert entry["version"] == "kegg116"
+    assert set(entry["files"]) == {"reference_model.yml.gz", "ko_reaction.tsv.gz"}
+    ref = entry["files"]["reference_model.yml.gz"]
+    assert ref["url"] == "https://x/rel/reference_model.yml.gz"  # trailing slash collapsed
+    assert ref["sha256"] == _sha(b"model")
+
+
+def test_data_entry_empty_dir_errors(tmp_path):
+    with pytest.raises(SystemExit):
+        mrs.data_entry("kegg", "v1", "https://x", tmp_path)
+
+
+def test_binary_entry_parses_platform_from_filename(tmp_path):
+    (tmp_path / "blast-2.16.0-linux-x86_64.zip").write_bytes(b"linux")
+    (tmp_path / "blast-2.16.0-macos-arm64.zip").write_bytes(b"mac")
+    (tmp_path / "other-1.0-linux-x86_64.zip").write_bytes(b"nope")  # different bundle
+
+    entry = mrs.binary_entry("blast", "2.16.0", ["blastp", "makeblastdb"], "https://x", tmp_path)
+    assert entry["provides"] == ["blastp", "makeblastdb"]
+    assert set(entry["platforms"]) == {"linux-x86_64", "macos-arm64"}
+    assert entry["platforms"]["macos-arm64"]["sha256"] == _sha(b"mac")
+    assert entry["platforms"]["linux-x86_64"]["url"].endswith("blast-2.16.0-linux-x86_64.zip")
+
+
+def test_binary_entry_no_zips_errors(tmp_path):
+    with pytest.raises(SystemExit):
+        mrs.binary_entry("blast", "2.16.0", ["blastp"], "https://x", tmp_path)
+
+
+def test_render_is_valid_json_round_trip():
+    entry = {"version": "v1", "files": {"a": {"url": "u", "sha256": "s"}}}
+    text = mrs.render("kegg", entry)
+    assert json.loads(text) == {"kegg": entry}
diff --git a/tests/test_tasks.py b/tests/test_tasks.py
new file mode 100644
index 0000000..1b5e6bd
--- /dev/null
+++ b/tests/test_tasks.py
@@ -0,0 +1,189 @@
+"""Tests for metabolic tasks (Phase 4a): parse_task_list + check_tasks."""
+import cobra
+import pytest
+
+from raven_python.tasks import Task, check_tasks, parse_task_list
+
+TASK_TSV = (
+    "ID\tDESCRIPTION\tIN\tIN UB\tOUT\tOUT LB\tEQU\tSHOULD FAIL\n"
+    "T1\tgrowth\tglc[e];o2[e]\t10\tbio[c]\t1\t\t\n"
+    "T2\tinfeasible\t\t\tatp[c]\t1\t\ttrue\n"
+    "\t\t\t\tnadh[c]\t\t\t\n"
+    "T3\twithequ\tA[c]\t\tB[c]\t\tA[c] <=> B[c]\t\n"
+)
+
+
+# --------------------------------------------------------------------------- #
+# parse_task_list
+# --------------------------------------------------------------------------- #
+@pytest.fixture
+def task_file(tmp_path):
+    p = tmp_path / "tasks.txt"
+    p.write_text(TASK_TSV)
+    return p
+
+
+def test_parse_basic_and_defaults(task_file):
+    tasks = parse_task_list(task_file)
+    assert [t.id for t in tasks] == ["T1", "T2", "T3"]
+    t1 = tasks[0]
+    assert t1.description == "growth"
+    # ';' splits mets sharing the row's bounds; IN LB defaults 0, IN UB from cell.
+    assert t1.inputs == [("glc[e]", 0.0, 10.0), ("o2[e]", 0.0, 10.0)]
+    assert t1.outputs == [("bio[c]", 1.0, 1000.0)]  # OUT UB defaults 1000
+
+
+def test_parse_should_fail_and_continuation(task_file):
+    t2 = parse_task_list(task_file)[1]
+    assert t2.should_fail is True
+    # continuation row (empty ID) appends nadh[c] to the same task's outputs
+    assert t2.outputs == [("atp[c]", 1.0, 1000.0), ("nadh[c]", 0.0, 1000.0)]
+
+
+def test_parse_equation_default_bounds(task_file):
+    t3 = parse_task_list(task_file)[2]
+    # reversible '<=>' -> EQU LB defaults -1000, UB 1000
+    assert t3.equations == [("A[c] <=> B[c]", -1000.0, 1000.0)]
+
+
+def test_parse_missing_id_column(tmp_path):
+    p = tmp_path / "bad.txt"
+    p.write_text("FOO\tBAR\nx\ty\n")
+    with pytest.raises(ValueError, match="ID"):
+        parse_task_list(p)
+
+
+def test_parse_warns_on_data_row_before_first_id(tmp_path):
+    """known_issues.md B3: continuation rows appearing before the first task ID
+    used to be silently dropped. Now warns so the user sees the malformed file."""
+    p = tmp_path / "orphan.txt"
+    p.write_text(
+        "ID\tDESCRIPTION\tIN\tIN UB\tOUT\tOUT UB\tSHOULD FAIL\n"
+        "\t\tglc[e]\t10\t\t\t\n"        # orphan data row, no ID seen yet
+        "T1\tgrowth\t\t\tbio[c]\t1\t\n"
+    )
+    with pytest.warns(UserWarning, match="no task ID has been seen yet"):
+        tasks = parse_task_list(p)
+    assert [t.id for t in tasks] == ["T1"]
+    # The orphan row's data isn't grafted onto T1 either.
+    assert tasks[0].inputs == []
+
+
+def test_parse_task_list_xlsx_missing_tasks_sheet(tmp_path):
+    """A .xlsx without a 'TASKS' sheet used to raise a bare KeyError; now
+    raises a clear ValueError naming the actual sheets (known_issues.md C3)."""
+    pytest.importorskip("openpyxl")
+    from openpyxl import Workbook
+
+    wb = Workbook()
+    wb.active.title = "NotTasks"
+    p = tmp_path / "wrong.xlsx"
+    wb.save(p)
+    with pytest.raises(ValueError, match="no sheet named 'TASKS'"):
+        parse_task_list(p)
+
+
+# --------------------------------------------------------------------------- #
+# check_tasks
+# --------------------------------------------------------------------------- #
+def _met(mid, name, comp="c"):
+    return cobra.Metabolite(mid, name=name, compartment=comp)
+
+
+@pytest.fixture
+def model():
+    """Closed model: A -> B (r1); D present but unproduced."""
+    m = cobra.Model("t")
+    A, B, D = _met("A_c", "A"), _met("B_c", "B"), _met("D_c", "D")
+    m.add_metabolites([A, B, D])
+    r1 = cobra.Reaction("r1", lower_bound=0, upper_bound=1000)
+    r1.add_metabolites({A: -1, B: 1})
+    m.add_reactions([r1])
+    return m
+
+
+def _by_id(results):
+    return {r.id: r for r in results}
+
+
+def test_feasible_task_passes(model):
+    # OUT LB=1 requires producing B (LB=0 would pass trivially via zero flux).
+    task = Task("make_B", inputs=[("A[c]", 0, 1000)], outputs=[("B[c]", 1, 1000)])
+    (res,) = check_tasks(model, [task])
+    assert res.feasible and res.passed
+
+
+def test_should_fail_task_passes_when_infeasible(model):
+    # Require producing B with no input -> infeasible -> should_fail makes it pass.
+    task = Task("no_input", outputs=[("B[c]", 1, 1000)], should_fail=True)
+    (res,) = check_tasks(model, [task])
+    assert not res.feasible and res.passed
+
+
+def test_unsatisfiable_task_fails(model):
+    task = Task("need_B", outputs=[("B[c]", 1, 1000)])  # no input, not should_fail
+    (res,) = check_tasks(model, [task])
+    assert not res.feasible and not res.passed
+
+
+def test_equation_adds_pathway(model):
+    # Model can't make D; the task's extra reaction B -> D enables output of D.
+    task = Task(
+        "make_D",
+        inputs=[("A[c]", 0, 1000)],
+        outputs=[("D[c]", 1, 1000)],
+        equations=[("B[c] => D[c]", 0.0, 1000.0)],
+    )
+    (res,) = check_tasks(model, [task])
+    assert res.passed
+    # without the extra reaction D cannot be made
+    (res2,) = check_tasks(model, [Task("make_D2", inputs=[("A[c]", 0, 1000)], outputs=[("D[c]", 1, 1000)])])
+    assert not res2.passed
+
+
+def test_changed_bounds_block_reaction(model):
+    # Blocking r1 makes B unproducible.
+    task = Task(
+        "block_r1",
+        inputs=[("A[c]", 0, 1000)],
+        outputs=[("B[c]", 1, 1000)],
+        changed=[("r1", 0.0, 0.0)],
+    )
+    (res,) = check_tasks(model, [task])
+    assert not res.passed
+
+
+def test_allmets_output(model):
+    # Force uptake of A (IN LB=1); the only fate is A->B, so B must be excreted.
+    # ALLMETS output permits that, making the task feasible; without it B accumulates.
+    task = Task("sink_all", inputs=[("A[c]", 1, 1000)], outputs=[("ALLMETS", 0, 1000)])
+    (res,) = check_tasks(model, [task])
+    assert res.passed
+    (res2,) = check_tasks(model, [Task("forced_no_out", inputs=[("A[c]", 1, 1000)])])
+    assert not res2.passed  # forced A uptake but nowhere for B to go
+
+
+def test_unknown_metabolite_reported(model):
+    task = Task("typo", inputs=[("Z[c]", 0, 1000)], outputs=[("B[c]", 0, 1000)])
+    (res,) = check_tasks(model, [task])
+    assert not res.passed and "unknown metabolite" in res.error
+
+
+def test_open_exchange_is_closed_so_task_controls_io(model):
+    # An open demand for B would let B leave for free; check_tasks closes it, so a
+    # task with no output for B and a forced... here: B has an open sink, but the
+    # task defines only input A and no output -> B must still balance (sink closed).
+    model.add_boundary(model.metabolites.B_c, type="sink")  # open B sink
+    task = Task("need_D_out", inputs=[("A[c]", 0, 1000)], outputs=[("D[c]", 1, 1000)])
+    (res,) = check_tasks(model, [task])
+    assert not res.passed  # D still cannot be produced despite the (now-closed) B sink
+
+
+def test_check_tasks_accepts_a_file_path(model, tmp_path):
+    p = tmp_path / "t.txt"
+    p.write_text(
+        "ID\tDESCRIPTION\tIN\tOUT\tOUT LB\n"
+        "make_B\tconvert\tA[c]\tB[c]\t1\n"
+    )
+    results = check_tasks(model, p)  # path, parsed internally
+    assert _by_id(results)["make_B"].passed
diff --git a/tests/test_tasks_essential.py b/tests/test_tasks_essential.py
new file mode 100644
index 0000000..5352378
--- /dev/null
+++ b/tests/test_tasks_essential.py
@@ -0,0 +1,114 @@
+"""Phase 4d.1: essential-reaction discovery for tasks (find_task_essential_reactions).
+
+Oracle: RAVEN tinitTests T0002 — for testModel + the "make e[s] from a[s]" task, the
+pre-merge essential reactions are R2 (the only a[s]<->a[c] link) and R7 (the only
+e[c]->e[s] producer); the alternative internal paths make nothing else essential.
+"""
+import cobra
+from tinit_oracles import (
+    TEST_MODEL_TASK_ESSENTIAL_PREMERGE,
+    make_test_model,
+    make_test_task,
+)
+
+from raven_python.tasks import (
+    EssentialReactionsResult,
+    Task,
+    find_task_essential_reactions,
+)
+
+
+def test_essential_reactions_match_oracle():
+    res = find_task_essential_reactions(make_test_model(), [make_test_task()])
+    assert isinstance(res, EssentialReactionsResult)
+    assert sorted(res.reactions) == TEST_MODEL_TASK_ESSENTIAL_PREMERGE  # ['R2', 'R7']
+    assert not res.failed_tasks
+
+
+def test_essential_directions_are_forward():
+    """R2 (a[s]->a[c]) and R7 (e[c]->e[s]) both carry positive flux for this task."""
+    res = find_task_essential_reactions(make_test_model(), [make_test_task()])
+    assert res.reactions == {"R2": 1, "R7": 1}
+
+
+def test_task_metabolites_collected():
+    """a[s] and e[s] are referenced by the task and must be protected from removal."""
+    res = find_task_essential_reactions(make_test_model(), [make_test_task()])
+    m = make_test_model()
+    names = {res_id: f"{m.metabolites.get_by_id(res_id).name}"
+             f"[{m.metabolites.get_by_id(res_id).compartment}]" for res_id in res.task_metabolites}
+    assert set(names.values()) == {"a[s]", "e[s]"}
+
+
+def test_no_task_no_essentials():
+    res = find_task_essential_reactions(make_test_model(), [])
+    assert res.reactions == {} and res.per_task == {}
+
+
+def test_equation_metabolites_are_protected():
+    """A task equation's metabolites count as task metabolites (protected from removal)."""
+    m = make_test_model()
+    task = Task(
+        id="equ",
+        inputs=[("a[s]", 0.0, 1000.0)],
+        outputs=[("e[c]", 1.0, 1.0)],
+        equations=[("a[c] => e[c]", 0.0, 1000.0)],  # references a[c], which is not an I/O met
+    )
+    res = find_task_essential_reactions(m, [task])
+    names = {f"{m.metabolites.get_by_id(i).name}[{m.metabolites.get_by_id(i).compartment}]"
+             for i in res.task_metabolites}
+    assert {"a[c]", "e[c]"} <= names and "equ" not in res.failed_tasks
+
+
+def test_infeasible_task_is_reported_failed():
+    """A task requiring an impossible output is dropped, not crashed."""
+    impossible = Task(id="bad", outputs=[("z[s]", 1.0, 1.0)])
+    # z[s] doesn't exist -> unknown metabolite -> failed.
+    res = find_task_essential_reactions(make_test_model(), [impossible])
+    assert res.failed_tasks == ["bad"] and res.reactions == {}
+
+
+def test_should_fail_task_defines_no_essentials():
+    res = find_task_essential_reactions(
+        make_test_model(), [Task(id="sf", should_fail=True, outputs=[("e[s]", 1.0, 1.0)])]
+    )
+    assert res.reactions == {} and "sf" not in res.per_task
+
+
+def test_direction_majority_across_tasks():
+    """A reaction essential reverse in two tasks and forward in one is recorded reverse."""
+    # Build a tiny model where a single reaction must run in a chosen direction.
+    m = cobra.Model("dir")
+    a, b = (cobra.Metabolite(x, name=x, compartment="s") for x in "ab")
+    m.add_metabolites([a, b])
+    r = cobra.Reaction("REV", lower_bound=-1000, upper_bound=1000)
+    r.add_metabolites({a: -1, b: 1})  # a <=> b
+    m.add_reactions([r])
+    m.objective = "REV"
+    # Task forcing net production of b from a -> REV forward (+1).
+    fwd = Task(id="fwd", inputs=[("a[s]", 0.0, 1000.0)], outputs=[("b[s]", 1.0, 1.0)])
+    # Two tasks forcing net production of a from b -> REV reverse (-1). Distinct ids
+    # (task lists have unique ids; essential discovery de-duplicates by id).
+    rev1 = Task(id="rev1", inputs=[("b[s]", 0.0, 1000.0)], outputs=[("a[s]", 1.0, 1.0)])
+    rev2 = Task(id="rev2", inputs=[("b[s]", 0.0, 1000.0)], outputs=[("a[s]", 1.0, 1.0)])
+    res = find_task_essential_reactions(m, [rev1, rev2, fwd])
+    assert res.reactions["REV"] == -1  # two reverse votes beat one forward
+
+
+def test_duplicate_name_comp_metabolites_both_constrained():
+    """A task referencing a name[comp] shared by two metabolites resolves (not 'missing')."""
+    m = cobra.Model("dup")
+    # Two distinct metabolites with the SAME name and compartment.
+    a1 = cobra.Metabolite("a1", name="a", compartment="s")
+    a2 = cobra.Metabolite("a2", name="a", compartment="s")
+    b = cobra.Metabolite("b", name="b", compartment="s")
+    m.add_metabolites([a1, a2, b])
+    r1 = cobra.Reaction("R1", lower_bound=0, upper_bound=1000)
+    r1.add_metabolites({a1: -1, b: 1})  # only a1 feeds b
+    m.add_reactions([r1])
+    m.objective = "R1"
+    # Output b from input a -> 'a[s]' matches both a1 and a2; must not be reported missing.
+    task = Task(id="t", inputs=[("a[s]", 0.0, 1000.0)], outputs=[("b[s]", 1.0, 1.0)])
+    res = find_task_essential_reactions(m, [task])
+    assert res.failed_tasks == []  # 'a[s]' resolved (to both a1 and a2), task feasible
+    assert "R1" in res.reactions
diff --git a/tests/test_utils_balance.py b/tests/test_utils_balance.py
new file mode 100644
index 0000000..aa1e47e
--- /dev/null
+++ b/tests/test_utils_balance.py
@@ -0,0 +1,76 @@
+"""Tests for get_elemental_balance (getElementalBalance port)."""
+import cobra
+import pytest
+
+from raven_python.utils import ElementalBalance, get_elemental_balance
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("t")
+    m.add_metabolites(
+        [
+            cobra.Metabolite("a_c", formula="C6H12O6", charge=0, compartment="c"),
+            cobra.Metabolite("b_c", formula="C6H12O6", charge=0, compartment="c"),
+            cobra.Metabolite("c_c", formula="C3H6O3", charge=0, compartment="c"),
+            cobra.Metabolite("n_c", compartment="c"),  # no formula
+        ]
+    )
+    r_bal = cobra.Reaction("R_bal")
+    m.add_reactions([r_bal])
+    r_bal.build_reaction_from_string("a_c --> b_c")        # C6H12O6 -> C6H12O6
+    r_unbal = cobra.Reaction("R_unbal")
+    m.add_reactions([r_unbal])
+    r_unbal.build_reaction_from_string("a_c --> c_c")      # C6H12O6 -> C3H6O3
+    r_unknown = cobra.Reaction("R_unknown")
+    m.add_reactions([r_unknown])
+    r_unknown.build_reaction_from_string("a_c --> n_c")    # n_c has no formula
+    return m
+
+
+def test_balanced(model):
+    (res,) = get_elemental_balance(model, ["R_bal"])
+    assert res == ElementalBalance("R_bal", "balanced", {})
+
+
+def test_unbalanced_reports_imbalance(model):
+    (res,) = get_elemental_balance(model, ["R_unbal"])
+    assert res.status == "unbalanced"
+    # products - reactants: C3H6O3 - C6H12O6 = -C3H6O3
+    assert res.imbalance == {"C": -3.0, "H": -6.0, "O": -3.0}
+
+
+def test_missing_formula_is_unknown_not_silently_wrong(model):
+    # cobra's check_mass_balance alone would silently report an imbalance here;
+    # we flag it as unknown instead.
+    (res,) = get_elemental_balance(model, ["R_unknown"])
+    assert res.status == "unknown"
+    assert res.imbalance == {}
+
+
+def test_all_reactions_default(model):
+    results = get_elemental_balance(model)
+    assert {r.reaction_id: r.status for r in results} == {
+        "R_bal": "balanced",
+        "R_unbal": "unbalanced",
+        "R_unknown": "unknown",
+    }
+
+
+def test_charge_excluded(model):
+    # give a charge imbalance but keep elements balanced -> still "balanced"
+    model.metabolites.get_by_id("b_c").charge = 1
+    (res,) = get_elemental_balance(model, ["R_bal"])
+    assert res.status == "balanced"
+
+
+# --- regression: empty reaction → unknown (known_issues.md F5) -------------
+
+def test_empty_reaction_is_unknown(model):
+    """A reaction with no metabolites used to be reported `balanced`
+    vacuously (any() over an empty list is False and check_mass_balance
+    returns no imbalance). Now reports `unknown`."""
+    empty = cobra.Reaction("R_empty", lower_bound=0, upper_bound=1000)
+    model.add_reactions([empty])
+    (res,) = get_elemental_balance(model, ["R_empty"])
+    assert res.status == "unknown"
diff --git a/tests/test_utils_gpr.py b/tests/test_utils_gpr.py
new file mode 100644
index 0000000..275d020
--- /dev/null
+++ b/tests/test_utils_gpr.py
@@ -0,0 +1,84 @@
+"""Tests for raven_python.utils.gpr (GPR linting)."""
+import cobra
+import pytest
+
+from raven_python.utils import GPRIssue, find_non_dnf_grrules, is_dnf
+
+
+@pytest.mark.parametrize(
+    "rule",
+    [
+        "",
+        "G1",
+        "G1 and G2",
+        "G1 or G2",
+        "G1 and G2 and G3",
+        "G1 or G2 or G3",
+        "(G1 and G2) or G3",
+        "(G1 and G2) or (G3 and G4)",
+        "G1 or (G2 and G3)",
+    ],
+)
+def test_is_dnf_true(rule):
+    assert is_dnf(rule) is True
+
+
+@pytest.mark.parametrize(
+    "rule",
+    [
+        "(G1 or G2) and G3",
+        "G1 and (G2 or G3)",
+        "(G1 or G2) and (G3 or G4)",
+        "G1 and (G2 or (G3 and G4))",
+    ],
+)
+def test_is_dnf_false(rule):
+    assert is_dnf(rule) is False
+
+
+def test_is_dnf_accepts_gpr_and_none():
+    from cobra.core.gene import GPR
+
+    assert is_dnf(GPR.from_string("(G1 or G2) and G3")) is False
+    assert is_dnf(GPR.from_string("G1 or G2")) is True
+    assert is_dnf(None) is True
+
+
+def test_is_dnf_independent_of_formatting():
+    # cobra normalises on assignment, so casing/whitespace cannot change the verdict.
+    assert is_dnf("(G1 OR G2)   AND   G3") is False
+    assert is_dnf("( G1 and G2 )  or  G3") is True
+
+
+def _model_with_rules(rules: dict[str, str]) -> cobra.Model:
+    model = cobra.Model("t")
+    model.add_reactions([cobra.Reaction(rid) for rid in rules])
+    for rid, rule in rules.items():
+        model.reactions.get_by_id(rid).gene_reaction_rule = rule
+    return model
+
+
+def test_find_non_dnf_grrules_flags_only_offenders():
+    model = _model_with_rules(
+        {
+            "R_ok_single": "G1",
+            "R_ok_complex": "G1 and G2",
+            "R_ok_dnf": "(G1 and G2) or G3",
+            "R_no_gpr": "",
+            "R_bad_1": "(G1 or G2) and G3",
+            "R_bad_2": "(G1 or G2) and (G3 or G4)",
+        }
+    )
+
+    issues = find_non_dnf_grrules(model)
+
+    assert [i.reaction_id for i in issues] == ["R_bad_1", "R_bad_2"]
+    assert all(isinstance(i, GPRIssue) for i in issues)
+    assert all("disjunctive normal form" in i.reason for i in issues)
+    # the reported GPR is the cobra-normalised string
+    assert issues[0].gpr == "(G1 or G2) and G3"
+
+
+def test_find_non_dnf_grrules_empty_when_all_clean():
+    model = _model_with_rules({"R1": "G1 or G2", "R2": "(G1 and G2) or G3"})
+    assert find_non_dnf_grrules(model) == []
diff --git a/tests/test_utils_sort.py b/tests/test_utils_sort.py
new file mode 100644
index 0000000..18bca24
--- /dev/null
+++ b/tests/test_utils_sort.py
@@ -0,0 +1,42 @@
+"""Tests for sort_identifiers and write_yaml_model(sort_ids=True)."""
+import cobra
+
+from raven_python.io import read_yaml_model, write_yaml_model
+from raven_python.manipulation import add_reactions_from_equations
+from raven_python.utils import sort_identifiers
+
+
+def _model():
+    m = cobra.Model("t")
+    m.add_metabolites([cobra.Metabolite(x, compartment="c") for x in ("b_c", "a_c")])
+    add_reactions_from_equations(
+        m,
+        [
+            {"id": "R2", "equation": "a_c --> b_c", "gene_reaction_rule": "GB"},
+            {"id": "R1", "equation": "b_c --> a_c", "gene_reaction_rule": "GA"},
+        ],
+    )
+    return m
+
+
+def test_sort_identifiers_orders_everything():
+    m = _model()
+    sort_identifiers(m)
+    assert [r.id for r in m.reactions] == ["R1", "R2"]
+    assert [x.id for x in m.metabolites] == ["a_c", "b_c"]
+    assert [g.id for g in m.genes] == ["GA", "GB"]
+    # lookup index still intact after sorting
+    assert m.reactions.get_by_id("R2").id == "R2"
+
+
+def test_write_yaml_sort_ids_does_not_mutate(tmp_path):
+    m = _model()
+    order_before = [r.id for r in m.reactions]
+    out = tmp_path / "m.yml"
+    write_yaml_model(m, out, sort_ids=True)
+    assert [r.id for r in m.reactions] == order_before  # model untouched
+    # but the file is sorted
+    text = out.read_text()
+    assert text.index("R1") < text.index("R2")
+    reloaded = read_yaml_model(out)
+    assert [r.id for r in reloaded.reactions] == ["R1", "R2"]
diff --git a/tests/test_utils_validate.py b/tests/test_utils_validate.py
new file mode 100644
index 0000000..2d38e6f
--- /dev/null
+++ b/tests/test_utils_validate.py
@@ -0,0 +1,80 @@
+"""Tests for check_model (the surviving checks of checkModelStruct)."""
+import cobra
+import pytest
+
+from raven_python.manipulation import add_reactions_from_equations
+from raven_python.utils import ModelIssue, check_model
+
+
+def _categories(issues, category):
+    return [i.object_id for i in issues if i.category == category]
+
+
+@pytest.fixture
+def model():
+    m = cobra.Model("t")
+    m.add_metabolites(
+        [
+            cobra.Metabolite("a_c", name="A", compartment="c"),
+            cobra.Metabolite("b_c", name="B", compartment="c"),
+        ]
+    )
+    add_reactions_from_equations(
+        m, [{"id": "R1", "equation": "a_c --> b_c", "gene_reaction_rule": "G1"}]
+    )
+    m.reactions.get_by_id("R1").objective_coefficient = 1
+    return m
+
+
+def test_clean_model_has_no_issues(model):
+    assert check_model(model) == []
+
+
+def test_orphan_metabolite(model):
+    model.add_metabolites([cobra.Metabolite("orphan_c", name="Orphan", compartment="c")])
+    assert "orphan_c" in _categories(check_model(model), "orphan_metabolite")
+
+
+def test_orphan_gene(model):
+    model.genes.append(cobra.core.gene.Gene("G_lonely"))
+    assert "G_lonely" in _categories(check_model(model), "orphan_gene")
+
+
+def test_empty_reaction(model):
+    model.add_reactions([cobra.Reaction("R_empty")])
+    assert "R_empty" in _categories(check_model(model), "empty_reaction")
+
+
+def test_empty_metabolite_name(model):
+    model.add_metabolites([cobra.Metabolite("noname_c", compartment="c")])
+    # also an orphan, but we check the name category specifically
+    assert "noname_c" in _categories(check_model(model), "empty_metabolite_name")
+
+
+def test_duplicate_name_compartment(model):
+    # second metabolite named "A" in compartment c
+    dup = cobra.Metabolite("a2_c", name="A", compartment="c")
+    model.add_metabolites([dup])
+    model.reactions.get_by_id("R1").add_metabolites({dup: -1})  # keep it used
+    issues = [i for i in check_model(model) if i.category == "duplicate_name_compartment"]
+    assert len(issues) == 1
+    assert "a_c" in issues[0].message and "a2_c" in issues[0].message
+
+
+def test_no_objective(model):
+    model.reactions.get_by_id("R1").objective_coefficient = 0
+    cats = [i.category for i in check_model(model)]
+    assert "objective" in cats
+
+
+def test_multiple_objectives(model):
+    add_reactions_from_equations(model, [{"id": "R2", "equation": "b_c --> a_c"}])
+    model.reactions.get_by_id("R2").objective_coefficient = 1
+    obj_issues = [i for i in check_model(model) if i.category == "objective"]
+    assert len(obj_issues) == 1
+    assert "Multiple" in obj_issues[0].message
+
+
+def test_returns_model_issue_instances(model):
+    model.add_reactions([cobra.Reaction("R_empty")])
+    assert all(isinstance(i, ModelIssue) for i in check_model(model))
diff --git a/tests/tinit_oracles.py b/tests/tinit_oracles.py
new file mode 100644
index 0000000..638956d
--- /dev/null
+++ b/tests/tinit_oracles.py
@@ -0,0 +1,161 @@
+"""Shared (ft)INIT test oracles, ported from RAVEN's ``tinitTests.m``.
+
+These toy models have **defined reaction scores** and **known ftINIT outputs**, so
+they serve as exact correctness oracles for the Phase 4d port (see
+docs/ftinit_review_and_plan.md). Building them here once lets every sub-phase
+(essential-reaction discovery, the MILP, linear merge, staging) check against the
+same RAVEN-verified answers.
+
+Reaction scores are injected through gene expression using :func:`expr_for_rxn_score`
+(RAVEN's ``getExprForRxnScore``): each toy reaction ``Ri`` has at most one gene
+``Gi``, so an expression of ``exp(score_i/5)`` reproduces the desired score exactly
+(no-gene reactions get ``no_gene_score = -2`` regardless).
+"""
+from __future__ import annotations
+
+import math
+
+import cobra
+
+
+def expr_for_rxn_score(scores, threshold: float = 1.0) -> dict:
+    """RAVEN ``getExprForRxnScore``: gene expression giving a target single-gene score.
+
+    Inverts ``score = 5·ln(level/threshold)`` → ``level = threshold·exp(score/5)``.
+    Returns ``{Gi: level}`` for i = 1..len(scores) (gene name ``"G{i}"``), mirroring the
+    1-reaction-1-gene layout of the toy models.
+    """
+    return {f"G{i + 1}": threshold * math.exp(s / 5) for i, s in enumerate(scores)}
+
+
+def _build(model_id, mets, reactions, objective):
+    """mets: {id: (name, compartment)}; reactions: {id: (stoich, lb, ub, gpr)}."""
+    m = cobra.Model(model_id)
+    met_objs = {
+        mid: cobra.Metabolite(mid, name=name, compartment=comp)
+        for mid, (name, comp) in mets.items()
+    }
+    m.add_metabolites(list(met_objs.values()))
+    for rid, (stoich, lb, ub, gpr) in reactions.items():
+        r = cobra.Reaction(rid, lower_bound=lb, upper_bound=ub)
+        r.add_metabolites({met_objs[mid]: coeff for mid, coeff in stoich.items()})
+        m.add_reactions([r])
+        if gpr:
+            r.gene_reaction_rule = gpr
+    m.objective = objective
+    return m
+
+
+# --------------------------------------------------------------------------- #
+# testModel — RAVEN getTstModel(): 8 mets, 10 rxns. a[s] -> ... -> e[s] export.
+# --------------------------------------------------------------------------- #
+_TEST_METS = {
+    "as": ("a", "s"), "ac": ("a", "c"), "bc": ("b", "c"), "cc": ("c", "c"),
+    "dc": ("d", "c"), "ec": ("e", "c"), "es": ("e", "s"), "fc": ("f", "c"),
+}
+_TEST_RXNS = {
+    "R1": ({"as": 1}, 0, 1000, ""),                       # -> a[s]   (exchange, no GPR)
+    "R2": ({"as": -1, "ac": 1}, -1000, 1000, ""),         # a[s] <=> a[c]  (transport, no GPR)
+    "R3": ({"ac": -1, "bc": 1, "cc": 1}, -1000, 1000, "G3"),
+    "R4": ({"ac": -1, "dc": 2}, -1000, 1000, "G4"),
+    "R5": ({"bc": -1, "cc": -1, "ec": 1}, 0, 1000, "G5"),
+    "R6": ({"dc": -2, "ec": 1}, 0, 1000, "G6"),
+    "R7": ({"ec": -1, "es": 1}, 0, 1000, "G7"),           # transport, with GPR
+    "R8": ({"es": -1}, 0, 1000, ""),                      # e[s] ->  (exchange, no GPR)
+    "R9": ({"ac": -1, "fc": 1}, -1000, 1000, "G9"),
+    "R10": ({"fc": -1, "ec": 1}, -1000, 1000, "G10"),
+}
+# RAVEN getTstModelRxnScores(), R1..R10.
+TEST_MODEL_SCORES = [-2, -2, -1, 7, 0.5, 0.5, -1, -2, -3, 3.5]
+
+
+def make_test_model() -> cobra.Model:
+    return _build("testModel", _TEST_METS, _TEST_RXNS, "R8")
+
+
+# Oracles (RAVEN tinitTests):
+# T0001 ftINIT, no tasks, default '1+1':
+TEST_MODEL_FTINIT_NO_TASKS = ["R1", "R4", "R6", "R8", "R9", "R10"]
+# T0001 with R7,R10 spontaneous:
+TEST_MODEL_FTINIT_SPONT_R7_R10 = ["R1", "R2", "R4", "R6", "R7", "R8"]
+# T0002 with task "gen e[s] from a[s]": essential rxns (pre-merge ids) and output:
+TEST_MODEL_TASK_ESSENTIAL_PREMERGE = ["R2", "R7"]
+TEST_MODEL_TASK_ESSENTIAL_MERGED = ["R1", "R7"]
+TEST_MODEL_FTINIT_WITH_TASK = ["R1", "R2", "R4", "R6", "R7", "R8", "R9", "R10"]
+# T0004 mergeLinear(testModel): merges {R1,R2},{R3,R5},{R4,R6},{R7,R8},{R9,R10}
+TEST_MODEL_GROUP_IDS = [1, 1, 2, 3, 2, 3, 4, 4, 5, 5]
+TEST_MODEL_MERGED_REV = [0, 0, 0, 0, 1]
+TEST_MODEL_MERGED_LB = [0, 0, 0, 0, -1000]
+# groupRxnScores with R1,R2,R8 zeroed (toIgnore): -> per merged group
+TEST_MODEL_GROUPED_SCORES = [0, -0.5, 7.5, -1, 0.5]
+
+
+# The task: generate e[s] from a[s] (RAVEN getTstModelTasks()).
+def make_test_task():
+    """RAVEN getTstModelTasks(): make e[s] from a[s]."""
+    from raven_python.tasks import Task
+
+    return Task(
+        id="Gen e[s] from a[s]",
+        description="Gen e[s] from a[s]",
+        inputs=[("a[s]", 0.0, math.inf)],   # (token, LBin, UBin)
+        outputs=[("e[s]", 1.0, 1.0)],       # (token, LBout, UBout)
+    )
+
+
+# --------------------------------------------------------------------------- #
+# testModel4 — RAVEN getTstModel4(): partial linear merges + flips.
+# --------------------------------------------------------------------------- #
+_TEST4_METS = {
+    "a": ("a", "s"), "b": ("b", "s"), "d": ("d", "s"), "e": ("e", "s"),
+    "f": ("f", "s"), "g": ("g", "s"), "h": ("h", "s"),
+}
+_TEST4_RXNS = {
+    "R1": ({"a": -1}, -1000, 1000, "G1"),                 # a[s] <=>
+    "R2": ({"a": -1, "b": 1}, 0, 1000, "G2"),             # a[s] -> b[s]
+    "R3": ({"a": -1, "b": 1}, -1000, 1000, "G3"),         # a[s] <=> b[s]
+    "R4": ({"b": -1}, 0, 1000, "G4"),                     # b[s] ->
+    "R5": ({"a": -5, "d": 5}, -1000, 1000, "G5"),         # 5 a[s] <=> 5 d[s]
+    "R6": ({"e": -1, "d": 1}, -1000, 1000, "G6"),         # e[s] <=> d[s]
+    "R7": ({"f": -1, "g": -1, "e": 1}, -1000, 1000, "G7"),  # f[s]+g[s] <=> e[s]
+    "R8": ({"b": -1, "f": 1}, -1000, 1000, "G8"),         # b[s] <=> f[s]
+    "R9": ({"h": -1, "g": 1}, -1000, 1000, "G9"),         # h[s] <=> g[s]
+    "R10": ({"h": -1}, 0, 1000, "G10"),                   # h[s] ->
+    "R11": ({"e": -1, "g": 1}, 0, 1000, "G11"),           # e[s] -> g[s]
+}
+TEST_MODEL4_SCORES = [-1, -1, 2, -1, 0.5, -2, 1, 1.3, -0.5, -0.4, 8]
+# T0004 mergeLinear(testModel4): merges {R5,R6},{R7,R8},{R9,R10}; rest unmerged.
+TEST_MODEL4_GROUP_IDS = [0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 0]
+TEST_MODEL4_MERGED_REV = [1, 0, 1, 0, 1, 1, 0, 0]
+TEST_MODEL4_REVERSED_RXNS = ["R6", "R9"]  # flipped direction when made irreversible
+
+
+def make_test_model4() -> cobra.Model:
+    return _build("testModel4", _TEST4_METS, _TEST4_RXNS, "R4")
+
+
+# --------------------------------------------------------------------------- #
+# testModel5 — RAVEN getTstModel5(): testModel + an unmerged parallel path R11-R14.
+# --------------------------------------------------------------------------- #
+def make_test_model5() -> cobra.Model:
+    m = make_test_model()
+    m.id = "testModel5"
+    m.add_metabolites([cobra.Metabolite("gc", name="g", compartment="c")])
+    gc = m.metabolites.get_by_id("gc")
+    ac = m.metabolites.get_by_id("ac")
+    ec = m.metabolites.get_by_id("ec")
+    extra = {
+        "R11": ({ac: -1, gc: 1}, -1000, 1000, "G11"),
+        "R12": ({ac: -1, gc: 1}, -1000, 1000, "G12"),
+        "R13": ({gc: -1, ec: 1}, -1000, 1000, "G13"),
+        "R14": ({gc: -1, ec: 1}, -1000, 1000, "G14"),
+    }
+    for rid, (stoich, lb, ub, gpr) in extra.items():
+        r = cobra.Reaction(rid, lower_bound=lb, upper_bound=ub)
+        r.add_metabolites(stoich)
+        m.add_reactions([r])
+        r.gene_reaction_rule = gpr
+    return m
+
+
+TEST_MODEL5_SCORES = [*TEST_MODEL_SCORES, -1, -1.5, -1, -1.5]