Skip to content
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions ansible/roles/migrate-kubo-c1/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
---
# Script to update the ceramic-one blockstore and run migration on the updated blocks


# Get latest common snapshot between the kubo and c1 datastores
- name: Get latest common snapshot between gitcoin-go-ipfs-1 and gitcoin-rust-ceramic-1
block:
- name: List snapshots from gitcoin-go-ipfs-1
ansible.builtin.shell:
cmd: zfs list -H -t snapshot -o name ipfspool/data-store
register: kubo_snapshots
delegate_to: gitcoin-go-ipfs-1

- name: Let snapshots from gitcoin-rust-ceramic-1
ansible.builtin.shell:
cmd: zfs list -H -t snapshot -o name migrationpool/data-store
register: c1_snapshots
delegate_to: gitcoin-rust-ceramic-1

- name: Find latest common snapshot
ansible.builtin.shell:
cmd: |
kubo_snaps="{{ kubo_snapshots.stdout_lines | join('\n') }}"
c1_snaps="{{ c1_snapshots.stdout_lines | join('\n') }}"
echo "$kubo_snaps" | grep -F "$(echo "$c1_snaps" | sed 's/migrationpool\/data-store@//')" | tail -n 1
register: common_snapshot
failed_when: common_snapshot.rc != 0 or common_snapshot.stdout == ""
delegate_to: localhost
Comment on lines +34 to +42

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this look for the latest common migrated snapshot? We don't just want to find the latest common snapshot, we want to find the one we know for sure was migrated last. WDYT?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, i don't think so, i think we just want to bring over everything since the latest common

if we make holes we can fix them manually, we want this whole process done in the next day or so

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think the main thing is to keep the files, and snapshots, and to have each filename labeled by date range so that the data is all available for reruns


- name: Display latest common snapshot
ansible.builtin.debug:
var: common_snapshot.stdout

run_once: true

- name: Create new snapshot on ipfs node
ansible.builtin.shell:
cmd: zfs snapshot ipfspool/data-store@$(date +%Y%m%d_%H%M%S)
register: new_snapshot
delegate_to: gitcoin-go-ipfs-1

- name: Send incremental snapshot to c1 node
ansible.builtin.shell:
cmd: |
zfs send -i {{ common_snapshot.stdout }} {{ new_snapshot.stdout }} | \
ssh gitcoin-rust-ceramic-1 'zfs receive migrationpool/data-store'
delegate_to: gitcoin-go-ipfs-1

- name: Set output file name for diff results
ansible.builtin.set_fact:
diff_output_file: "/tmp/zfs_diff_output_$(date +%Y%m%d_%H%M%S).txt"
modified_block_file: "/tmp/zfs_modified_blockfiles_$(date +%Y%m%d_%H%M%S).txt"
files_to_process: "/tmp/zfs_modified_files_to_process_$(date +%Y%m%d_%H%M%S).txt"
already_processed: "/tmp/zfs_already_processed.txt"


- name: Run zfs diff on the c1 node
block:
- name: Execute zfs diff
ansible.builtin.shell:
cmd: |
zfs diff -F {{ common_snapshot.stdout | regex_replace('ipfspool', 'migrationpool') }} {{ new_snapshot.stdout | regex_replace('ipfspool', 'migrationpool') }} | tee "{{ diff_output_file }}"
args:
executable: /bin/bash
register: diff_result
delegate_to: gitcoin-rust-ceramic-1
become: yes

- name: Display diff results in real time
ansible.builtin.debug:
var: diff_result.stdout_lines
when: diff_result.stdout_lines | length > 0

strategy: free

# run the migration script on the c1 node only on the changed blocks

- name: Generate list of modified block files
ansible.builtin.shell:
cmd: |
awk '/^M\t\/\t/ {print $3}' "{{ diff_output_file }}" | grep '/go_ipfs_datastore/ipfs-data/blocks/' > "{{ modified_block_file }}"
args:
executable: /bin/bash
delegate_to: gitcoin-rust-ceramic-1
become: yes

- name: Exclude files already processed
ansible.builtin.shell:
cmd: |
comm -23 <(sort -u "{{ modified_block_file }}") <(sort -u "{{ already_processed }}") > "{{ files_to_process }}"
args:
executable: /bin/bash
delegate_to: gitcoin-rust-ceramic-1
become: yes


- name: Run migration and update processed files list
block:

Comment thread
gvelez17 marked this conversation as resolved.
# TODO correct how we run this script TODO #
- name: Run migration on modified files not already processed
ansible.builtin.command:
cmd: >
ceramic-one migrations from-ipfs
--input-ipfs-path {{ input_ipfs_path }}
--output-store-path {{ output_store_path }}
--input-file-list-path {{ files_to_process }}
--network {{ network }}
{% if local_network_id is defined %}--local-network-id {{ local_network_id }}{% endif %}
{% if non_sharded_paths %}--non-sharded-paths{% endif %}
{% if log_tile_docs %}--log-tile-docs{% endif %}
environment:
CERAMIC_ONE_INPUT_FILE_LIST_PATH: "{{ files_to_process }}"
delegate_to: gitcoin-rust-ceramic-1
become: yes

- name: Append processed files to already_processed list
ansible.builtin.shell:
cmd: cat {{ files_to_process }} >> {{ already_processed }}
delegate_to: gitcoin-rust-ceramic-1
become: yes

always:
- name: Display migration completion message
ansible.builtin.debug:
msg: "Migration process completed. Check logs for details."

rescue:
- name: Display migration failure message
ansible.builtin.debug:
msg: "Migration process failed. Check logs for errors."