Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.development
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ATS_API_CLIENT_TESTING_API_KEY=a94272cb45956f2116f3026847ebb640dc60a2fe
DATABASE_URL=postgis://postgres:postgres@localhost
DFE_SIGN_IN_REDIRECT_URL=http://localhost:3000/auth/dfe/callback
DISABLE_EMAILS=false
DOMAIN=localhost:3000
Expand Down
4 changes: 2 additions & 2 deletions app/jobs/import_organisation_data_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ class ImportOrganisationDataJob < ApplicationJob
def perform
SchoolGroupMembership.mark_all_records_for_deletion

Gias::ImportSchoolsAndLocalAuthorities.new.call
Gias::ImportSchoolsAndLocalAuthorities.call
Gias::ImportTrusts.new.call

# NOTE: Gias::ImportSchoolsAndLocalAuthorities.new.call updates most SchoolGroupMemberships so they are no longer marked for deletion. Only
# NOTE: Gias::ImportSchoolsAndLocalAuthorities.call updates most SchoolGroupMemberships so they are no longer marked for deletion. Only
# the SchoolGroupMemberships that don't exist in the new import will still be marked for deletion.
SchoolGroupMembership.delete_records_marked_for_deletion
end
Expand Down
2 changes: 2 additions & 0 deletions app/models/organisation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ class Organisation < ApplicationRecord
include PgSearch::Model
extend FriendlyId

include Discard::Model

has_rich_text :description

SPECIAL_SCHOOL_TYPES = ["Community special school", "Foundation special school", "Non-maintained special school", "Academy special converter", "Academy special sponsor led", "Free schools special"].freeze
Expand Down
15 changes: 11 additions & 4 deletions app/models/school.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,24 @@ class School < Organisation
has_many :school_group_memberships, dependent: :destroy
has_many :school_groups, through: :school_group_memberships

scope :not_excluded, -> { where.not(detailed_school_type: EXCLUDED_DETAILED_SCHOOL_TYPES) }
scope :not_excluded, -> { kept.where.not(detailed_school_type: EXCLUDED_DETAILED_SCHOOL_TYPES) }

validates :urn, uniqueness: true

ACADEMY_TYPE = "Academies".freeze
LA_SCHOOL_TYPE = "Local authority maintained schools".freeze
FREE_SCHOOL_TYPE = "Free Schools".freeze
INDEPENDENT_SCHOOL_TYPE = "Independent schools".freeze
VALID_SCHOOL_TYPES = [LA_SCHOOL_TYPE, INDEPENDENT_SCHOOL_TYPE, "Special schools", "Universities", ACADEMY_TYPE, FREE_SCHOOL_TYPE, "Welsh schools", "Other types", "Colleges", "Online provider"].freeze
COLLEGE_SCHOOL_TYPE = "Colleges".freeze
FE_DETAILED_SCHOOL_TYPE = "Further education".freeze
VALID_SCHOOL_TYPES = [LA_SCHOOL_TYPE, INDEPENDENT_SCHOOL_TYPE, "Special schools", "Universities", ACADEMY_TYPE, FREE_SCHOOL_TYPE, "Welsh schools", "Other types", COLLEGE_SCHOOL_TYPE, "Online provider"].freeze
EXCLUDED_SCHOOL_TYPES = ["Universities", "Welsh schools", "Online providers"].freeze

# This is direct from GIAS (with plurals removed via singularize)
# This is direct from GIAS
validates :school_type, inclusion: { in: VALID_SCHOOL_TYPES }

EXCLUDED_DETAILED_SCHOOL_TYPES = [
"Further education",
FE_DETAILED_SCHOOL_TYPE,
"Other independent school",
"Online provider",
"British schools overseas",
Expand Down Expand Up @@ -51,6 +54,10 @@ class School < Organisation
through: %i[early_years ks1 ks2 ks3 ks4 ks5],
}.freeze

def excluded?
!kept? || detailed_school_type.in?(EXCLUDED_DETAILED_SCHOOL_TYPES)
end

def religious_character
return if !respond_to?(:gias_data) || gias_data.nil?
return if ["None", "Does not apply"].include?(gias_data["ReligiousCharacter (name)"])
Expand Down
4 changes: 4 additions & 0 deletions app/models/school_group.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,8 @@ def all_organisation_ids
def ats_interstitial_variant
"non_faith"
end

def excluded?
false
end
end
2 changes: 1 addition & 1 deletion app/models/school_group_membership.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def self.delete_records_marked_for_deletion
to_delete = marked_for_deletion.map { |m| m.school_group.name }
.group_by { |x| x }
.transform_values(&:length)
Sentry.capture_message("Memberships to delete, by SchoolGroup: #{to_delete}", level: :info)
Sentry.capture_message("Memberships to delete, by SchoolGroup: #{to_delete}", level: :info) if marked_for_deletion.any?

if marked_for_deletion.count > MAX_RECORDS_TO_BULK_DELETE
# If this error is raised investigate the school_groups associated with the marked for deletion school group memberships. if a school group, local authority,
Expand Down
241 changes: 129 additions & 112 deletions app/services/gias/import_schools_and_local_authorities.rb
Original file line number Diff line number Diff line change
@@ -1,141 +1,158 @@
require "log_benchmark"
require "csv"

class Gias::ImportSchoolsAndLocalAuthorities
SCHOOLS_AND_LOCAL_AUTHORITIES_CSV = "edubasealldata".freeze
BATCH_SIZE = 100

include LogBenchmark

def initialize
reset_data
end
extend LogBenchmark

class ImportFailure < StandardError
end

def call
log_benchmark("Importing schools and local authorities") do
import_errors = Gias::Data.new(SCHOOLS_AND_LOCAL_AUTHORITIES_CSV).each_slice(BATCH_SIZE).flat_map do |group|
group.each do |row|
local_authorities.add(group_data(row))
schools.push(school_data(row))
memberships.push(membership_data(row))
class << self
def call
# This file is a list of colleges in-scope for FE vacancies on TVS
uk_colleges = CSV.read(Rails.root.join("config/data/colleges.csv"), headers: true).index_by { |r| r.fetch("UKPRN").to_i }.transform_values(&:to_h)

log_benchmark("Importing schools and local authorities") do
import_errors = Gias::Data.new(SCHOOLS_AND_LOCAL_AUTHORITIES_CSV).each_slice(BATCH_SIZE).flat_map do |group|
import_group uk_colleges, group
end
import_batch
raise ImportFailure, import_errors.map(&:errors) if import_errors.any?
end
raise(ImportFailure, import_errors.map { |x| x.errors.full_messages }) if import_errors.any?
end
end

private
private

def import_group(uk_colleges, group)
local_authorities = Set.new # LAs are provided with every school so we can discard duplicates
schools = []
memberships = []
discarded = []

group.each do |row|
local_authorities.add(group_data(row))
school_row = school_data(row)
schools.push(school_row)
if (school_row.fetch(:school_type) == School::COLLEGE_SCHOOL_TYPE && school_row.fetch(:detailed_school_type) == School::FE_DETAILED_SCHOOL_TYPE && uk_colleges.exclude?(school_row.fetch(:uk_prn)))
|| school_row.fetch(:school_type).in?(School::EXCLUDED_SCHOOL_TYPES)
|| school_row.fetch(:establishment_status) == "Closed"
discarded.push(school_row)
end
memberships.push(membership_data(row))
end
import_batch(local_authorities, schools, memberships).tap do
discarded.each { |school_row| School.find_by!(urn: school_row.fetch(:urn)).discard }
end
end

attr_reader :local_authorities, :schools, :memberships
def import_batch(local_authorities, schools, memberships)
import_local_authorities(local_authorities).failed_instances +
import_schools(schools).failed_instances +
import_memberships(local_authorities, schools, memberships).failed_instances
end

def reset_data
@local_authorities = Set.new # LAs are provided with every school so we can discard duplicates
@schools = []
@memberships = []
end
def import_local_authorities(local_authorities)
SchoolGroup.import(
local_authorities.to_a,
on_duplicate_key_update: {
conflict_target: [:local_authority_code],
columns: local_authorities.first.keys,
},
)
end

# sum doesn't work on arrays the same way it works on Integers
# rubocop:disable Performance/Sum
def import_batch
[import_local_authorities,
import_schools,
import_memberships].map(&:failed_instances)
.reduce(:+).tap { reset_data }
end
# rubocop:enable Performance/Sum

def import_local_authorities
SchoolGroup.import(
local_authorities.to_a,
on_duplicate_key_update: {
conflict_target: [:local_authority_code],
columns: local_authorities.first.keys,
},
)
end
def import_schools(schools)
imported_schools = schools.map { |s| s.except(:uk_prn, :religious_character).merge(discarded_at: nil) }
School.import(
imported_schools,
on_duplicate_key_update: {
conflict_target: [:urn],
columns: imported_schools.first.keys,
},
)
end

def import_schools
School.import(
schools,
on_duplicate_key_update: {
conflict_target: [:urn],
columns: schools.first.keys,
},
)
end
def import_memberships(local_authorities, schools, memberships)
school_ids = School.where(urn: schools.map { |s| s[:urn] }).pluck(:urn, :id).to_h
group_ids = SchoolGroup.where(
local_authority_code: local_authorities.map { |la| la[:local_authority_code] },
).pluck(:local_authority_code, :id).to_h

# school_group_memberships = memberships.map do |m|
# {
# school_id: school_ids.fetch(m.fetch(:urn)),
# school_group_id: group_ids.fetch(m.fetch(:local_authority_code)),
# do_not_delete: true,
# }
# end
school_group_memberships = memberships.map do |m|
{
school_id: school_ids[m[:urn]],
school_group_id: group_ids[m[:local_authority_code]],
do_not_delete: true,
}
end

def import_memberships
school_ids = School.where(urn: schools.map { |s| s[:urn] }).pluck(:urn, :id).to_h
group_ids = SchoolGroup.where(
local_authority_code: local_authorities.map { |la| la[:local_authority_code] },
).pluck(:local_authority_code, :id).to_h
SchoolGroupMembership.import(
school_group_memberships,
on_duplicate_key_update: {
conflict_target: %i[school_id school_group_id],
columns: school_group_memberships.first.keys,
},
)
end

school_group_memberships = memberships.map do |m|
def group_data(row)
{
school_id: school_ids[m[:urn]],
school_group_id: group_ids[m[:local_authority_code]],
do_not_delete: true,
local_authority_code: row["LA (code)"],
name: row["LA (name)"],
group_type: "local_authority",
gias_data: row.to_h.slice("LA (code)", "LA (name)"),
}
end

SchoolGroupMembership.import(
school_group_memberships,
on_duplicate_key_update: {
conflict_target: %i[school_id school_group_id],
columns: school_group_memberships.first.keys,
},
)
end

def group_data(row)
{
local_authority_code: row["LA (code)"],
name: row["LA (name)"],
group_type: "local_authority",
gias_data: row.to_h.slice("LA (code)", "LA (name)"),
}
end

def school_data(row) # rubocop:disable Metrics/MethodLength
{
urn: row["URN"],
address: row["Street"],
address3: row["Address3"],
county: row["County (name)"],
detailed_school_type: row["TypeOfEstablishment (name)"],
establishment_status: row["EstablishmentStatus (name)"],
local_authority_within: row["LA (name)"],
locality: row["Locality"],
maximum_age: row["StatutoryHighAge"],
minimum_age: row["StatutoryLowAge"],
name: row["EstablishmentName"],
postcode: row["Postcode"],
region: row["GOR (name)"],
school_type: row["EstablishmentTypeGroup (name)"],
town: row["Town"],
phase: row["PhaseOfEducation (code)"].to_i,
url: Addressable::URI.heuristic_parse(row["SchoolWebsite"]).to_s,
gias_data: row.to_h,
}.merge(school_location_data(row)).transform_values(&:presence)
end
def school_data(row) # rubocop:disable Metrics/MethodLength
{
urn: row["URN"],
address: row["Street"],
address3: row["Address3"],
county: row["County (name)"],
detailed_school_type: row["TypeOfEstablishment (name)"],
establishment_status: row["EstablishmentStatus (name)"],
local_authority_within: row["LA (name)"],
locality: row["Locality"],
maximum_age: row["StatutoryHighAge"],
minimum_age: row["StatutoryLowAge"],
name: row["EstablishmentName"],
postcode: row["Postcode"],
region: row["GOR (name)"],
school_type: row["EstablishmentTypeGroup (name)"],
town: row["Town"],
phase: row["PhaseOfEducation (code)"].to_i,
url: Addressable::URI.heuristic_parse(row["SchoolWebsite"]).to_s,
uk_prn: row["UKPRN"].to_i,
gias_data: row.to_h,
religious_character: row["ReligiousCharacter (name)"],
}.merge(school_location_data(row)).transform_values(&:presence)
end

def school_location_data(row)
return {} unless row["Easting"] && row["Northing"]
def school_location_data(row)
return {} unless row["Easting"] && row["Northing"]

uk27700 = GeoFactories::FACTORY_27700.point(row["Easting"].to_i, row["Northing"].to_i)
{
uk_geopoint: uk27700,
geopoint: GeoFactories.convert_sr27700_to_wgs84(uk27700),
}
end
uk27700 = GeoFactories::FACTORY_27700.point(row["Easting"].to_i, row["Northing"].to_i)
{
uk_geopoint: uk27700,
geopoint: GeoFactories.convert_sr27700_to_wgs84(uk27700),
}
end

def membership_data(row)
{
urn: row["URN"],
local_authority_code: row["LA (code)"],
}
def membership_data(row)
{
urn: row["URN"],
local_authority_code: row["LA (code)"],
}
end
end
end
2 changes: 2 additions & 0 deletions app/services/gias/import_trusts.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

class Gias::ImportTrusts
TRUSTS_CSV = "allgroupsdata".freeze
# alllinksdata only seems to have type 5 and 6 links in it.
TRUST_MEMBERSHIPS_CSV = "alllinksdata".freeze
# TRUST_MEMBERSHIPS_CSV = "links_edubasealldata".freeze
MAT_GROUP_TYPE = 6

include LogBenchmark
Expand Down
4 changes: 1 addition & 3 deletions app/services/vacancies/import/shared.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ module Vacancies::Import
module Shared
LEGACY_WORKING_PATTERNS = %w[flexible term_time job_share].freeze
def vacancy_listed_at_excluded_school_type?(schools)
return false if schools.none?

(schools.map(&:detailed_school_type) & School::EXCLUDED_DETAILED_SCHOOL_TYPES).present?
schools.any?(&:excluded?)
end

# Our system only imports MAT type trusts from GIAS DB.
Expand Down
1 change: 1 addition & 0 deletions config/analytics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ shared:
- group_type
- local_authority_within
- establishment_status
- discarded_at
organisation_publisher_preferences:
- id
- organisation_id
Expand Down
Loading
Loading