|
1 | 1 | require 'tempfile' |
2 | | -require 'csv' |
3 | 2 | require 'aws-sdk-s3' |
4 | 3 | require 'rubyXL' |
5 | 4 | require 'rubyXL/convenience_methods/workbook' |
|
8 | 7 | class UrnListImporterJob < ApplicationJob |
9 | 8 | class AlreadyImported < StandardError; end |
10 | 9 |
|
11 | | - class InvalidFormat < StandardError; end |
12 | | - |
13 | | - REQUIRED_COLUMNS = ['URN', 'CustomerName', 'PostCode', 'Sector'].freeze |
14 | | - |
15 | 10 | discard_on ActiveJob::DeserializationError |
16 | 11 | discard_on AlreadyImported |
17 | 12 |
|
18 | | - discard_on InvalidFormat do |job, _error| |
19 | | - job.arguments.first.update!(aasm_state: :failed) |
20 | | - end |
21 | | - |
22 | 13 | retry_on Aws::S3::Errors::ServiceError |
23 | 14 |
|
| 15 | + # rubocop:disable Metrics/AbcSize |
24 | 16 | def perform(urn_list) |
25 | 17 | raise AlreadyImported unless urn_list.pending? |
26 | 18 |
|
27 | 19 | downloader = AttachedFileDownloader.new(urn_list.excel_file) |
28 | 20 | downloader.download! |
29 | 21 |
|
30 | | - convert_to_csv(downloader.temp_file.path) |
31 | | - |
32 | | - customers = customers_from_csv |
33 | | - |
34 | | - soft_delete!(customers) |
35 | | - upsert!(customers) |
36 | | - |
37 | | - remove_published_column(urn_list, downloader.temp_file.path) |
38 | | - |
39 | | - urn_list.update!(aasm_state: :processed) |
40 | | - |
41 | | - downloader.temp_file.close |
42 | | - downloader.temp_file.unlink |
| 22 | + rows = UrnLists::ReadExcel.new(file_path: downloader.temp_file.path).call |
| 23 | + count = UrnLists::ImportCustomers.new(rows: rows).call |
| 24 | + |
| 25 | + workbook_temp_file = build_workbook_temp_file(urn_list) |
| 26 | + remove_published_column(urn_list, workbook_temp_file.path) |
| 27 | + |
| 28 | + urn_list.update!( |
| 29 | + aasm_state: :processed, |
| 30 | + completed_at: Time.current, |
| 31 | + processed_count: count |
| 32 | + ) |
| 33 | + rescue Aws::S3::Errors::ServiceError |
| 34 | + raise |
| 35 | + rescue UrnLists::ReadExcel::InvalidFormat |
| 36 | + mark_failed!(urn_list) |
| 37 | + raise |
| 38 | + rescue StandardError => e |
| 39 | + mark_failed!(urn_list) if urn_list.persisted? && urn_list.pending? |
| 40 | + raise e |
| 41 | + ensure |
| 42 | + cleanup_downloader_temp_file(downloader&.temp_file) |
| 43 | + cleanup_downloader_temp_file(workbook_temp_file) |
43 | 44 | end |
| 45 | + # rubocop:enable Metrics/AbcSize |
44 | 46 |
|
45 | 47 | private |
46 | 48 |
|
47 | | - def convert_to_csv(path) |
48 | | - command = "in2csv --sheet=\"Customers\" --locale=en_GB --blanks --skipinitialspace #{path}" |
49 | | - command += " | csvcut -c 'URN,CustomerName,PostCode,Sector,Published'" |
50 | | - command += " > \"#{csv_temp_file.path}\"" |
51 | | - |
52 | | - result = Ingest::CommandRunner.new(command).run! |
53 | | - raise InvalidFormat if result.stderr.any? { |s| s.include?('Error') } |
| 49 | + def build_workbook_temp_file(urn_list) |
| 50 | + file = Tempfile.new(['urn_list_workbook', '.xlsx']) |
| 51 | + file.binmode |
| 52 | + file.write(urn_list.excel_file.download) |
| 53 | + file.flush |
| 54 | + file.rewind |
| 55 | + file |
54 | 56 | end |
55 | 57 |
|
56 | | - def csv_temp_file |
57 | | - @csv_temp_file ||= Tempfile.new('customer') |
58 | | - end |
59 | | - |
60 | | - def customers_from_csv |
61 | | - customers = [] |
62 | | - |
63 | | - CSV.foreach(csv_temp_file, headers: true) do |row| |
64 | | - raise InvalidFormat unless (row.headers & REQUIRED_COLUMNS) == REQUIRED_COLUMNS |
| 58 | + def cleanup_downloader_temp_file(file) |
| 59 | + return unless file |
65 | 60 |
|
66 | | - customers << Customer.new( |
67 | | - name: row['CustomerName'], |
68 | | - urn: row['URN'].to_i, |
69 | | - postcode: row['PostCode'], |
70 | | - sector: (row['Sector'] == 'Central Government' ? :central_government : :wider_public_sector), |
71 | | - deleted: false, |
72 | | - published: (row['Published'] == 'False' ? false : true) |
73 | | - ) |
74 | | - end |
75 | | - |
76 | | - csv_temp_file.close |
77 | | - csv_temp_file.unlink |
78 | | - |
79 | | - customers |
80 | | - end |
81 | | - |
82 | | - def upsert!(customers) |
83 | | - Customer.transaction do |
84 | | - Customer.import( |
85 | | - customers, |
86 | | - batch_size: 100, |
87 | | - on_duplicate_key_update: { |
88 | | - conflict_target: [:urn], |
89 | | - columns: %i[name postcode sector deleted published] |
90 | | - } |
91 | | - ) |
92 | | - end |
93 | | - end |
94 | | - |
95 | | - def soft_delete!(customers) |
96 | | - existing_urns = Customer.pluck(:urn) |
97 | | - importing_urns = customers.map(&:urn) |
98 | | - |
99 | | - urns_to_be_deleted = existing_urns - importing_urns |
100 | | - |
101 | | - Customer.where(urn: urns_to_be_deleted).update(deleted: true) |
| 61 | + file.close unless file.closed? |
| 62 | + file.unlink |
102 | 63 | end |
103 | 64 |
|
104 | 65 | def remove_published_column(urn_list, path) |
@@ -134,4 +95,12 @@ def delete_non_publish_row(worksheet, row_num, row) |
134 | 95 | worksheet.delete_row(row_num) |
135 | 96 | true |
136 | 97 | end |
| 98 | + |
| 99 | + def mark_failed!(urn_list, processed_count: 0) |
| 100 | + urn_list.update!( |
| 101 | + aasm_state: :failed, |
| 102 | + completed_at: Time.current, |
| 103 | + processed_count: processed_count |
| 104 | + ) |
| 105 | + end |
137 | 106 | end |
0 commit comments