Browse Source

Add tooling to import address databases

feature/region-slug-gone
Bèr Kessels 5 months ago
parent
commit
e4722c2044
4 changed files with 65 additions and 1 deletions
  1. 1
    0
      Makefile
  2. 34
    0
      bin/openaddr2copy
  3. 28
    0
      bin/seedaddr
  4. 2
    1
      test/test_helper.rb

+ 1
- 0
Makefile View File

@@ -40,6 +40,7 @@ _db-setup:
$(CMD_PREFIX) rake db:event_store
$(CMD_PREFIX) rake db:projections
$(CMD_PREFIX) rake db:seed # TODO: don't run seeds on test
@if [ $(APP_ENV) = 'test' ]; then ./bin/seedaddr < test/fixtures/address_sample.csv; fi

_wait:
sleep 5

+ 34
- 0
bin/openaddr2copy View File

@@ -0,0 +1,34 @@
#!/bin/bash

# Exit on error. Append "|| true" if you expect an error.
set -o errexit
# Exit on error inside any functions or subshells.
set -o errtrace
# Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR
set -o nounset
# Catch the error in case mysqldump fails (but gzip succeeds) in `mysqldump |gzip`
set -o pipefail
# Turn on traces, useful while debugging but commented out by default
# set -o xtrace

# Assuming a table like this:
# CREATE TABLE query_addresses (
# hash char(16) PRIMARY KEY,
# number varchar(32),
# street varchar(255),
# unit varchar(255),
# city varchar(255),
# district varchar(255),
# region varchar(255),
# postcode varchar(32),
# location GEOMETRY(POINT, 4326)
# );
# CREATE INDEX query_address_location ON query_addresses USING GIST(location);
# It can be inserted into postgres using:
# ./bin/openaddr < openaddr/nl/countrywide.csv | psql $DB_URL -c "COPY query_addresses FROM STDIN DELIMITER AS ',' CSV HEADER"

# Print the new header
csvtk rename -f1-11 -n'lon,lat,number,street,unit,city,district,region,postcode,id,hash' - \
| csvtk uniq -f hash \
| csvtk mutate2 -n location -e '"SRID=4326;POINT(" + $lon + " " + $lat + ")"' \
| csvtk cut -f hash,number,street,unit,city,district,region,postcode,location

+ 28
- 0
bin/seedaddr View File

@@ -0,0 +1,28 @@
#!/bin/bash

# Exit on error. Append "|| true" if you expect an error.
set -o errexit
# Exit on error inside any functions or subshells.
set -o errtrace
# Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR
set -o nounset
# Catch the error in case mysqldump fails (but gzip succeeds) in `mysqldump |gzip`
set -o pipefail
# Turn on traces, useful while debugging but commented out by default
#set -o xtrace

source .env
APP_ENV=${APP_ENV:=development}
ENV_SRC_FILE=".env.${APP_ENV}"
if [ -f ${ENV_SRC_FILE} ]; then
source ${ENV_SRC_FILE}
fi
DATABASE_TABLE="query_addresses"

DB_URL="postgres://${DB_USER}:${DB_USER}@${DB_HOST}:${DB_PORT}/${DB_NAME}"
function psql_exec {
echo $(psql $DB_URL -c "${1}")
}

psql_exec "TRUNCATE ${DATABASE_TABLE}"
psql_exec "COPY ${DATABASE_TABLE} FROM STDIN DELIMITER AS ',' CSV HEADER"

+ 2
- 1
test/test_helper.rb View File

@@ -35,7 +35,8 @@ module Minitest
config.logger = Logger.new(nil)
end

DatabaseCleaner.strategy = :truncation, { except: %w[spatial_ref_sys] }
DatabaseCleaner.strategy = :truncation,
{ except: %w[spatial_ref_sys query_addresses] }

before :each do
DatabaseCleaner.start

Loading…
Cancel
Save