Skip to content

Commit ca5eb11

Browse files
authored
Merge pull request #1269 from Crown-Commercial-Service/feature/nrmi-59-urn-nightly-job
Feature/nrmi 59 urn nightly job
2 parents 5a8cd96 + e8fc0af commit ca5eb11

16 files changed

Lines changed: 866 additions & 217 deletions

app/controllers/admin/urn_lists_controller.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ class Admin::UrnListsController < AdminController
22
before_action :find_latest_list, only: %i[index download]
33

44
def index
5-
@urn_lists = UrnList.order(created_at: :desc).all
5+
@urn_lists = UrnList.order(created_at: :desc).page(params[:page])
66
end
77

88
def new
99
@urn_list = UrnList.new
1010
end
1111

1212
def create
13-
@urn_list = UrnList.new(urn_list_params)
13+
@urn_list = UrnList.new(urn_list_params.merge(source: 'manual_upload'))
1414

1515
if @urn_list.save
1616
UrnListImporterJob.perform_later(@urn_list)
@@ -33,7 +33,7 @@ def urn_list_params
3333
end
3434

3535
def find_latest_list
36-
@latest_urn_list = UrnList.processed.order(created_at: :desc).first
36+
@latest_urn_list = UrnList.where(source: 'manual_upload', aasm_state: 'processed').order(created_at: :desc).first
3737
end
3838

3939
def s3_client

app/jobs/urn_list_api_sync_job.rb

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
class UrnListApiSyncJob < ApplicationJob
2+
def perform
3+
urn_list = UrnList.create!(aasm_state: :pending, source: 'api_import')
4+
5+
rows = UrnLists::ApiClient.new.fetch_rows
6+
count = UrnLists::ImportCustomers.new(rows: rows).call
7+
8+
urn_list.update!(
9+
aasm_state: :processed,
10+
completed_at: Time.current,
11+
processed_count: count
12+
)
13+
rescue StandardError => e
14+
urn_list.update!(
15+
aasm_state: :failed,
16+
completed_at: Time.current,
17+
processed_count: count || 0
18+
)
19+
raise e
20+
end
21+
end

app/jobs/urn_list_importer_job.rb

Lines changed: 43 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
require 'tempfile'
2-
require 'csv'
32
require 'aws-sdk-s3'
43
require 'rubyXL'
54
require 'rubyXL/convenience_methods/workbook'
@@ -8,97 +7,59 @@
87
class UrnListImporterJob < ApplicationJob
98
class AlreadyImported < StandardError; end
109

11-
class InvalidFormat < StandardError; end
12-
13-
REQUIRED_COLUMNS = ['URN', 'CustomerName', 'PostCode', 'Sector'].freeze
14-
1510
discard_on ActiveJob::DeserializationError
1611
discard_on AlreadyImported
1712

18-
discard_on InvalidFormat do |job, _error|
19-
job.arguments.first.update!(aasm_state: :failed)
20-
end
21-
2213
retry_on Aws::S3::Errors::ServiceError
2314

15+
# rubocop:disable Metrics/AbcSize
2416
def perform(urn_list)
2517
raise AlreadyImported unless urn_list.pending?
2618

2719
downloader = AttachedFileDownloader.new(urn_list.excel_file)
2820
downloader.download!
2921

30-
convert_to_csv(downloader.temp_file.path)
31-
32-
customers = customers_from_csv
33-
34-
soft_delete!(customers)
35-
upsert!(customers)
36-
37-
remove_published_column(urn_list, downloader.temp_file.path)
38-
39-
urn_list.update!(aasm_state: :processed)
40-
41-
downloader.temp_file.close
42-
downloader.temp_file.unlink
22+
rows = UrnLists::ReadExcel.new(file_path: downloader.temp_file.path).call
23+
count = UrnLists::ImportCustomers.new(rows: rows).call
24+
25+
workbook_temp_file = build_workbook_temp_file(urn_list)
26+
remove_published_column(urn_list, workbook_temp_file.path)
27+
28+
urn_list.update!(
29+
aasm_state: :processed,
30+
completed_at: Time.current,
31+
processed_count: count
32+
)
33+
rescue Aws::S3::Errors::ServiceError
34+
raise
35+
rescue UrnLists::ReadExcel::InvalidFormat
36+
mark_failed!(urn_list)
37+
raise
38+
rescue StandardError => e
39+
mark_failed!(urn_list) if urn_list.persisted? && urn_list.pending?
40+
raise e
41+
ensure
42+
cleanup_downloader_temp_file(downloader&.temp_file)
43+
cleanup_downloader_temp_file(workbook_temp_file)
4344
end
45+
# rubocop:enable Metrics/AbcSize
4446

4547
private
4648

47-
def convert_to_csv(path)
48-
command = "in2csv --sheet=\"Customers\" --locale=en_GB --blanks --skipinitialspace #{path}"
49-
command += " | csvcut -c 'URN,CustomerName,PostCode,Sector,Published'"
50-
command += " > \"#{csv_temp_file.path}\""
51-
52-
result = Ingest::CommandRunner.new(command).run!
53-
raise InvalidFormat if result.stderr.any? { |s| s.include?('Error') }
49+
def build_workbook_temp_file(urn_list)
50+
file = Tempfile.new(['urn_list_workbook', '.xlsx'])
51+
file.binmode
52+
file.write(urn_list.excel_file.download)
53+
file.flush
54+
file.rewind
55+
file
5456
end
5557

56-
def csv_temp_file
57-
@csv_temp_file ||= Tempfile.new('customer')
58-
end
59-
60-
def customers_from_csv
61-
customers = []
62-
63-
CSV.foreach(csv_temp_file, headers: true) do |row|
64-
raise InvalidFormat unless (row.headers & REQUIRED_COLUMNS) == REQUIRED_COLUMNS
58+
def cleanup_downloader_temp_file(file)
59+
return unless file
6560

66-
customers << Customer.new(
67-
name: row['CustomerName'],
68-
urn: row['URN'].to_i,
69-
postcode: row['PostCode'],
70-
sector: (row['Sector'] == 'Central Government' ? :central_government : :wider_public_sector),
71-
deleted: false,
72-
published: (row['Published'] == 'False' ? false : true)
73-
)
74-
end
75-
76-
csv_temp_file.close
77-
csv_temp_file.unlink
78-
79-
customers
80-
end
81-
82-
def upsert!(customers)
83-
Customer.transaction do
84-
Customer.import(
85-
customers,
86-
batch_size: 100,
87-
on_duplicate_key_update: {
88-
conflict_target: [:urn],
89-
columns: %i[name postcode sector deleted published]
90-
}
91-
)
92-
end
93-
end
94-
95-
def soft_delete!(customers)
96-
existing_urns = Customer.pluck(:urn)
97-
importing_urns = customers.map(&:urn)
98-
99-
urns_to_be_deleted = existing_urns - importing_urns
100-
101-
Customer.where(urn: urns_to_be_deleted).update(deleted: true)
61+
file.close unless file.closed?
62+
file.unlink
10263
end
10364

10465
def remove_published_column(urn_list, path)
@@ -134,4 +95,12 @@ def delete_non_publish_row(worksheet, row_num, row)
13495
worksheet.delete_row(row_num)
13596
true
13697
end
98+
99+
def mark_failed!(urn_list, processed_count: 0)
100+
urn_list.update!(
101+
aasm_state: :failed,
102+
completed_at: Time.current,
103+
processed_count: processed_count
104+
)
105+
end
137106
end

app/models/urn_list.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
class UrnList < ApplicationRecord
22
include AASM
33

4+
enum :source, {
5+
manual_upload: 'manual_upload',
6+
api_import: 'api_import'
7+
}
8+
49
aasm do
510
state :pending, initial: true
611
state :processed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
require 'net/http'
2+
require 'uri'
3+
require 'json'
4+
5+
module UrnLists
6+
class ApiClient
7+
class ApiError < StandardError; end
8+
9+
def fetch_rows
10+
token = fetch_access_token
11+
fetch_urn_list(token)
12+
end
13+
14+
private
15+
16+
def fetch_access_token
17+
uri = URI.parse(ENV.fetch('MDM_API_TOKEN_URL'))
18+
19+
response = Net::HTTP.post_form(uri, {
20+
grant_type: 'client_credentials',
21+
client_id: ENV.fetch('MDM_API_CLIENT_ID'),
22+
client_secret: ENV.fetch('MDM_API_CLIENT_SECRET'),
23+
scope: ENV.fetch('MDM_API_SCOPE')
24+
})
25+
26+
raise ApiError, "Failed to fetch access token: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
27+
28+
body = JSON.parse(response.body)
29+
body.fetch('access_token')
30+
end
31+
32+
def fetch_urn_list(token)
33+
base_url = 'https://apim.crowncommercial.gov.uk/website-data/manual/paths/invoke/%5Batt%5D.%5Bvw_RMIActiveURNList%5D/'
34+
params = {
35+
'api-version' => '2016-10-01',
36+
'sp' => '/triggers/manual/run',
37+
'sv' => '1.0',
38+
'filter' => "Published eq 'True'"
39+
}
40+
41+
uri = URI(base_url)
42+
uri.query = URI.encode_www_form(params)
43+
44+
request = Net::HTTP::Get.new(uri.to_s)
45+
request['Authorization'] = "Bearer #{token}"
46+
request['Accept'] = 'application/json'
47+
48+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
49+
http.request(request)
50+
end
51+
52+
raise ApiError, "Failed to fetch URN list: #{response.code}" unless response.is_a?(Net::HTTPSuccess)
53+
54+
rows = JSON.parse(response.body)
55+
validate_rows!(rows)
56+
rows
57+
end
58+
59+
def validate_rows!(rows)
60+
return if rows.is_a?(Array) && rows.all? { |row| row.is_a?(Hash) }
61+
62+
raise ApiError, 'Invalid URN list format: expected an array of objects'
63+
end
64+
end
65+
end
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
module UrnLists
2+
class ImportCustomers
3+
def initialize(rows:)
4+
@rows = rows
5+
end
6+
7+
def call
8+
customers = build_customers
9+
soft_delete!(customers)
10+
upsert!(customers)
11+
12+
customers.count
13+
end
14+
15+
private
16+
17+
attr_reader :rows
18+
19+
def build_customers
20+
rows.map do |row|
21+
next row if row.is_a?(Customer)
22+
23+
Customer.new(
24+
name: row['CustomerName'],
25+
urn: row['URN'].to_i,
26+
postcode: row['PostCode'],
27+
sector: normalize_sector(row['Sector']),
28+
deleted: false,
29+
published: normalize_published(row['Published'])
30+
)
31+
end
32+
end
33+
34+
def normalize_sector(value)
35+
value == 'Central Government' ? :central_government : :wider_public_sector
36+
end
37+
38+
def normalize_published(value)
39+
return true if value.nil?
40+
41+
value != 'False'
42+
end
43+
44+
def upsert!(customers)
45+
Customer.transaction do
46+
Customer.import(
47+
customers,
48+
batch_size: 100,
49+
on_duplicate_key_update: {
50+
conflict_target: [:urn],
51+
columns: %i[name postcode sector deleted published]
52+
}
53+
)
54+
end
55+
end
56+
57+
def soft_delete!(customers)
58+
existing_urns = Customer.pluck(:urn)
59+
importing_urns = customers.map(&:urn)
60+
61+
urns_to_be_deleted = existing_urns - importing_urns
62+
63+
Customer.where(urn: urns_to_be_deleted).update(deleted: true)
64+
end
65+
end
66+
end

0 commit comments

Comments
 (0)