Commit 8bf419d6 authored by Martin Raifer's avatar Martin Raifer
Browse files

first version

parents
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.heigit.ohsome.clustertools</groupId>
<artifactId>oshdb-extractor</artifactId>
<version>0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<parent>
<groupId>org.heigit.bigspatialdata</groupId>
<artifactId>bigspatialdata-parent</artifactId>
<version>1.2</version>
</parent>
<dependencies>
<dependency>
<groupId>org.heigit.bigspatialdata</groupId>
<artifactId>oshdb-api</artifactId>
<version>0.5.4</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
<version>8.1.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.wololo</groupId>
<artifactId>jts2geojson</artifactId>
<version>0.13.0</version>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
<version>1.72</version>
</dependency>
</dependencies>
<repositories>
<repository>
<snapshots>
<enabled>false</enabled>
</snapshots>
<id>oshdb-releases</id>
<name>Heigit/GIScience repository (releases)</name>
<url>http://repo.heigit.org/artifactory/libs-release-local</url>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<archive>
<manifest>
<mainClass>
org.heigit.ohsome.clustertools.CreateH2Extract
</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
</project>
\ No newline at end of file
package org.heigit.ohsome.clustertools;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import it.unimi.dsi.fastutil.io.FastByteArrayInputStream;
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.io.Reader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import javax.cache.Cache;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.cache.query.QueryCursor;
import org.apache.ignite.cache.query.ScanQuery;
import org.h2.tools.RunScript;
import org.heigit.bigspatialdata.oshdb.api.db.OSHDBIgnite;
import org.heigit.bigspatialdata.oshdb.api.db.OSHDBJdbc;
import org.heigit.bigspatialdata.oshdb.grid.GridOSHEntity;
import org.heigit.bigspatialdata.oshdb.grid.GridOSHNodes;
import org.heigit.bigspatialdata.oshdb.grid.GridOSHRelations;
import org.heigit.bigspatialdata.oshdb.grid.GridOSHWays;
import org.heigit.bigspatialdata.oshdb.index.XYGridTree;
import org.heigit.bigspatialdata.oshdb.index.XYGridTree.CellIdRange;
import org.heigit.bigspatialdata.oshdb.osh.OSHEntity;
import org.heigit.bigspatialdata.oshdb.osh.OSHNode;
import org.heigit.bigspatialdata.oshdb.osh.OSHRelation;
import org.heigit.bigspatialdata.oshdb.osh.OSHWay;
import org.heigit.bigspatialdata.oshdb.osm.OSMEntity;
import org.heigit.bigspatialdata.oshdb.osm.OSMMember;
import org.heigit.bigspatialdata.oshdb.osm.OSMRelation;
import org.heigit.bigspatialdata.oshdb.util.CellId;
import org.heigit.bigspatialdata.oshdb.util.OSHDBBoundingBox;
import org.heigit.bigspatialdata.oshdb.util.OSHDBRole;
import org.heigit.bigspatialdata.oshdb.util.OSHDBTag;
import org.heigit.bigspatialdata.oshdb.util.OSHDBTagKey;
import org.heigit.bigspatialdata.oshdb.util.TableNames;
import org.heigit.bigspatialdata.oshdb.util.geometry.OSHDBGeometryBuilder;
import org.heigit.bigspatialdata.oshdb.util.tagtranslator.TagTranslator;
import org.locationtech.jts.geom.Geometry;
import org.wololo.jts2geojson.GeoJSONReader;
import org.wololo.jts2geojson.GeoJSONWriter;
public class CreateH2Extract {
public static void main(String[] args) throws Exception {
CmdArgsConfig cmdArgs = new CmdArgsConfig();
JCommander coms = JCommander.newBuilder().addObject(cmdArgs).build();
try {
coms.parse(args);
} catch (ParameterException e) {
JCommander.getConsole().println("");
JCommander.getConsole().println(e.getLocalizedMessage());
JCommander.getConsole().println("");
coms.usage();
System.exit(1);
}
if (cmdArgs.help) {
coms.usage();
System.exit(0);
}
List<Double> bboxCoords = Arrays.stream(cmdArgs.bbox.split(","))
.map(Double::parseDouble).collect(Collectors.toList());
OSHDBBoundingBox bbox = new OSHDBBoundingBox(bboxCoords.get(0), bboxCoords.get(1), bboxCoords.get(2), bboxCoords.get(3));
Class.forName("org.h2.Driver");
try (Connection destination = DriverManager.getConnection("jdbc:h2:" + cmdArgs.outputH2, "sa", "")) {
RunScript.execute(destination, new InputStreamReader(ClassLoader.getSystemClassLoader().getResourceAsStream("empty.oshdb.sql")));
System.out.println("> done preparing tables");
}
List<String> keytableSettings = Arrays.asList(cmdArgs.inKeytables.split("\\|"));
try (
OSHDBJdbc keytables = new OSHDBJdbc(
keytableSettings.get(0),
keytableSettings.get(1),
keytableSettings.get(2),
keytableSettings.get(3));
OSHDBIgnite source = new OSHDBIgnite(cmdArgs.igniteConfig)
.prefix(cmdArgs.inPrefix);
Connection destination = DriverManager.getConnection("jdbc:h2:" + cmdArgs.outputH2, "sa", "");
PreparedStatement insertNodes = destination.prepareStatement("insert into " + TableNames.T_NODES.toString(cmdArgs.outPrefix) + " (level,id,data) values (?,?,?)");
PreparedStatement insertWays = destination.prepareStatement("insert into " + TableNames.T_WAYS.toString(cmdArgs.outPrefix) + " (level,id,data) values (?,?,?)");
PreparedStatement insertRelations = destination.prepareStatement("insert into " + TableNames.T_RELATIONS.toString(cmdArgs.outPrefix) + " (level,id,data) values (?,?,?)");
PreparedStatement insertKey = destination.prepareStatement("insert into key (id,txt) values (?,?)");
PreparedStatement insertTag = destination.prepareStatement("insert into keyvalue (keyid,valueid,txt) values (?,?,?)");
PreparedStatement insertRole = destination.prepareStatement("insert into role (id,txt) values (?,?)");
PreparedStatement selectMetadata = keytables.getConnection().prepareStatement("select key, value from metadata");
PreparedStatement insertMetadata = destination.prepareStatement("insert into metadata (key,value) values (?,?)")
) {
if (!cmdArgs.skipExtractRegionCheck) {
Geometry extractRegion = (new GeoJSONReader()).read(keytables.metadata("extract.region"));
if (!extractRegion.contains(OSHDBGeometryBuilder.getGeometry(bbox))) {
System.err.println("error: bbox not fully contained in input region");
System.exit(2);
}
}
XYGridTree grid = new XYGridTree();
Map<Integer, TreeMap<Long, CellIdRange>> cellIdRangesByLevel = new HashMap<>();
for (CellIdRange cellIdRange : grid.bbox2CellIdRanges(bbox, true)) {
int level = cellIdRange.getStart().getZoomLevel();
if (!cellIdRangesByLevel.containsKey(level)) {
cellIdRangesByLevel.put(level, new TreeMap<>());
}
cellIdRangesByLevel.get(level).put(cellIdRange.getStart().getId(), cellIdRange);
}
Set<OSHDBTagKey> tagKeys = new HashSet<>();
Set<OSHDBTag> tags = new HashSet<>();
Set<OSHDBRole> roles = new HashSet<>();
TagTranslator tt = new TagTranslator(keytables.getConnection());
ScanQuery<Long, GridOSHEntity> scanQuery = new ScanQuery<>((key, cell) -> {
CellId cellId = CellId.fromLevelId(key);
int level = cellId.getZoomLevel();
long id = cellId.getId();
if (!cellIdRangesByLevel.containsKey(level)) {
return false;
}
Entry<Long, CellIdRange> cellIdRangeEntry = cellIdRangesByLevel.get(level).floorEntry(id);
if (cellIdRangeEntry == null) {
return false;
}
CellIdRange cellIdRange = cellIdRangeEntry.getValue();
return cellIdRange.getStart().getId() <= id && cellIdRange.getEnd().getId() >= id;
});
try (
IgniteCache<Long, GridOSHEntity> cache = source.getIgnite().cache(TableNames.T_NODES.toString(cmdArgs.inPrefix));
QueryCursor<GridOSHEntity> cursor = cache.query(scanQuery, cacheEntry -> {
GridOSHEntity gridEntry = cacheEntry.getValue();
try {
return GridOSHNodes.rebase(
gridEntry.getId(),
gridEntry.getLevel(),
0,
0,
0,
0,
StreamSupport.stream(gridEntry.getEntities().spliterator(), false)
.map(x -> (OSHNode) x)
.filter(osh -> osh.getBoundingBox().intersects(bbox))
.collect(Collectors.toList())
);
} catch (IOException e) {
e.printStackTrace();
return null;
}
})
) {
for (GridOSHEntity filteredGridEntry : cursor) {
writeEntity(filteredGridEntry, insertNodes, tagKeys, tags, roles, cmdArgs);
}
}
System.out.println("> nodes done");
try (
IgniteCache<Long, GridOSHEntity> cache = source.getIgnite().cache(TableNames.T_WAYS.toString(cmdArgs.inPrefix));
QueryCursor<GridOSHEntity> cursor = cache.query(scanQuery, cacheEntry -> {
GridOSHEntity gridEntry = cacheEntry.getValue();
try {
return GridOSHWays.compact(
gridEntry.getId(),
gridEntry.getLevel(),
0,
0,
0,
0,
StreamSupport.stream(gridEntry.getEntities().spliterator(), false)
.map(x -> (OSHWay) x)
.filter(osh -> osh.getBoundingBox().intersects(bbox))
.collect(Collectors.toList())
);
} catch (IOException e) {
e.printStackTrace();
return null;
}
})
) {
for (GridOSHEntity filteredGridEntry : cursor) {
writeEntity(filteredGridEntry, insertWays, tagKeys, tags, roles, cmdArgs);
}
}
System.out.println("> ways done" + cmdArgs.skipLargeRelations);
if (!cmdArgs.skipRelations) {
try (
IgniteCache<Long, GridOSHEntity> cache = source.getIgnite()
.cache(TableNames.T_RELATIONS.toString(cmdArgs.inPrefix));
QueryCursor<GridOSHEntity> cursor = cache.query(scanQuery, cacheEntry -> {
GridOSHEntity gridEntry = cacheEntry.getValue();
try {
return GridOSHRelations.compact(
gridEntry.getId(),
gridEntry.getLevel(),
0,
0,
0,
0,
StreamSupport.stream(gridEntry.getEntities().spliterator(), false)
.map(x -> (OSHRelation) x)
.filter(osh -> osh.getBoundingBox().intersects(bbox))
.filter(oshRel -> !cmdArgs.skipLargeRelations ||
StreamSupport.stream(oshRel.getVersions().spliterator(), false)
.flatMap(osmrel -> Arrays.stream(osmrel.getMembers()))
.map(OSMMember::getEntity)
.filter(Objects::nonNull)
.anyMatch(member -> member.getBoundingBox().intersects(bbox))
)
.collect(Collectors.toList())
);
} catch (IOException e) {
e.printStackTrace();
return null;
}
})
) {
for (GridOSHEntity filteredGridEntry : cursor) {
writeEntity(filteredGridEntry, insertRelations, tagKeys, tags, roles, cmdArgs);
}
}
System.out.println("> relations done");
}
if (!cmdArgs.skipKeytables) {
for (OSHDBTagKey tagKey : tagKeys) {
insertKey.setInt(1, tagKey.toInt());
insertKey.setString(2, tt.getOSMTagKeyOf(tagKey).toString());
try {
insertKey.execute();
} catch (Exception e) {
System.err.println(e.toString());
}
}
for (OSHDBTag tag : tags) {
insertTag.setInt(1, tag.getKey());
insertTag.setInt(2, tag.getValue());
insertTag.setString(3, tt.getOSMTagOf(tag).toString());
try {
insertTag.execute();
} catch (Exception e) {
System.err.println(e.toString());
}
}
for (OSHDBRole role : roles) {
insertRole.setInt(1, role.toInt());
insertRole.setString(2, tt.getOSMRoleOf(role).toString());
try {
insertRole.execute();
} catch (Exception e) {
System.err.println(e.toString());
}
}
System.out.println("> keytables done");
}
if (!cmdArgs.skipMetadata) {
try (ResultSet metadata = selectMetadata.executeQuery()) {
while (metadata.next()) {
String metadataKey = metadata.getString(1);
String metadataValue = metadata.getString(2);
insertMetadata.setString(1, metadataKey);
if (metadataKey.equalsIgnoreCase("extract.region")) {
metadataValue = (new GeoJSONWriter()).write(OSHDBGeometryBuilder.getGeometry(bbox)).toString();
}
if (metadataKey.startsWith("data.")) {
// ignore raw data statistics
continue;
}
insertMetadata.setString(2, metadataValue);
insertMetadata.execute();
}
insertMetadata.setString(1, "generator");
insertMetadata.setString(2, "oshdb-extractor-v0.1");
insertMetadata.execute();
System.out.println("> metadata done");
}
}
System.out.println("> all done");
}
}
private static void writeEntity(GridOSHEntity gridCell, PreparedStatement insert, Set<OSHDBTagKey> tagKeys, Set<OSHDBTag> tags, Set<OSHDBRole> roles, CmdArgsConfig cmdArgs) throws SQLException, IOException {
// grid cell
insert.setInt(1, gridCell.getLevel());
insert.setLong(2, gridCell.getId());
FastByteArrayOutputStream out = new FastByteArrayOutputStream(1024);
try (ObjectOutputStream oos = new ObjectOutputStream(out)) {
oos.writeObject(gridCell);
oos.flush();
}
FastByteArrayInputStream in = new FastByteArrayInputStream(out.array, 0, out.length);
insert.setBinaryStream(3, in);
insert.execute();
// tags & roles
if (!cmdArgs.skipKeytables) {
boolean doRoles = gridCell instanceof GridOSHRelations;
for (OSHEntity entity : gridCell.getEntities()) {
int[] keys = entity.getRawTagKeys();
for (int key : keys) {
tagKeys.add(new OSHDBTagKey(key));
}
for (OSMEntity version : entity.getVersions()) {
int[] rawTags = version.getRawTags();
for (int i = 0; i < rawTags.length; i += 2) {
tags.add(new OSHDBTag(rawTags[i], rawTags[i + 1]));
}
if (doRoles) {
OSMMember[] members = ((OSMRelation) version).getMembers();
for (OSMMember member : members) {
roles.add(new OSHDBRole(member.getRawRoleId()));
}
}
}
}
}
}
private static class CmdArgsConfig {
@Parameter(names = {"-i", "--ignite", "--ignite-config"}, description = "Path ot ignite-config.xml", required = true, order = 1)
public File igniteConfig;
@Parameter(names = {"--input-prefix"}, description = "input cache prefix", required = false)
public String inPrefix;
@Parameter(names = {"-k", "--input-keytables"}, description = "input keytables connection, pipe separated: \"<JDBC class>|<JDBC connection string>|<DB username>|<DB password>\"", required = true, order = 2)
public String inKeytables;
@Parameter(names = {"-b", "--bbox"}, description = "bounding box to cut out; list of 4 coordinate endpoints: min-lon, min-lat, max-lon, max-lat", required = true, order = 3)
public String bbox;
@Parameter(names = {"-o", "--output"}, description = "Path to output H2 file", required = true, order = 4)
public File outputH2;
//@Parameter(names = {"--output-prefix"}, description = "output table prefix", required = false)
public String outPrefix;
@Parameter(names = {"--skip-keytables"}, description = "skip keytables", required = false)
public boolean skipKeytables = false;
@Parameter(names = {"--skip-metadata"}, description = "skip metadata", required = false)
public boolean skipMetadata = false;
@Parameter(names = {"--skip-relations"}, description = "skip relations", required = false)
public boolean skipRelations = false;
@Parameter(names = {"-s", "--skip-large-relations"}, description = "skip non-intersecting relations; useful for small extracts where large relations are not needed; recommended option", required = false)
public boolean skipLargeRelations = false;
@Parameter(names = {"--skip-bbox-check"}, description = "skip check that tests whether the requested bounding box lies fully in the input data set; use this option with care", required = false)
public boolean skipExtractRegionCheck = false;
@Parameter(names = {"--help", "-h"}, help = true, order = 0)
public boolean help = false;
}
}
\ No newline at end of file
CREATE USER IF NOT EXISTS SA SALT '18cd08f3a6432f39' HASH 'eb7f76db6824c107afaad9daf4354e61842744ddd695ecc966fbc017fcf50232' ADMIN;
CREATE CACHED TABLE PUBLIC.METADATA(
KEY VARCHAR NOT NULL,
VALUE VARCHAR
);
ALTER TABLE PUBLIC.METADATA ADD CONSTRAINT PUBLIC.CONSTRAINT_1 PRIMARY KEY(KEY);
-- 0 +/- SELECT COUNT(*) FROM PUBLIC.METADATA;
CREATE CACHED TABLE PUBLIC.KEY(
ID INT NOT NULL,
TXT VARCHAR
);
ALTER TABLE PUBLIC.KEY ADD CONSTRAINT PUBLIC.CONSTRAINT_12 PRIMARY KEY(ID);
-- 0 +/- SELECT COUNT(*) FROM PUBLIC.KEY;
CREATE INDEX PUBLIC.INDEX_1 ON PUBLIC.KEY(TXT);
CREATE CACHED TABLE PUBLIC.KEYVALUE(
KEYID INT NOT NULL SELECTIVITY 5,
VALUEID INT NOT NULL SELECTIVITY 10,
TXT VARCHAR SELECTIVITY 96
);
ALTER TABLE PUBLIC.KEYVALUE ADD CONSTRAINT PUBLIC.CONSTRAINT_4 PRIMARY KEY(KEYID, VALUEID);
-- 0 +/- SELECT COUNT(*) FROM PUBLIC.KEYVALUE;
CREATE INDEX PUBLIC.INDEX_4 ON PUBLIC.KEYVALUE(KEYID, TXT);
CREATE CACHED TABLE PUBLIC.ROLE(
ID INT NOT NULL,
TXT VARCHAR
);
ALTER TABLE PUBLIC.ROLE ADD CONSTRAINT PUBLIC.CONSTRAINT_2 PRIMARY KEY(ID);
-- 0 +/- SELECT COUNT(*) FROM PUBLIC.ROLE;
CREATE INDEX PUBLIC.INDEX_2 ON PUBLIC.ROLE(TXT);
CREATE CACHED TABLE PUBLIC.GRID_NODE(
LEVEL INT NOT NULL,
ID BIGINT NOT NULL,
DATA BLOB
);
ALTER TABLE PUBLIC.GRID_NODE ADD CONSTRAINT PUBLIC.CONSTRAINT_F PRIMARY KEY(LEVEL, ID);
-- 0 +/- SELECT COUNT(*) FROM PUBLIC.GRID_NODE;
CREATE CACHED TABLE PUBLIC.GRID_WAY(
LEVEL INT NOT NULL,
ID BIGINT NOT NULL,
DATA BLOB
);
ALTER TABLE PUBLIC.GRID_WAY ADD CONSTRAINT PUBLIC.CONSTRAINT_41 PRIMARY KEY(LEVEL, ID);
-- 0 +/- SELECT COUNT(*) FROM PUBLIC.GRID_WAY;
CREATE CACHED TABLE PUBLIC.GRID_RELATION(
LEVEL INT NOT NULL,
ID BIGINT NOT NULL,
DATA BLOB
);
ALTER TABLE PUBLIC.GRID_RELATION ADD CONSTRAINT PUBLIC.CONSTRAINT_8 PRIMARY KEY(LEVEL, ID);
-- 0 +/- SELECT COUNT(*) FROM PUBLIC.GRID_RELATION;
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment