Merge branch 'way_keys' into v3

This commit is contained in:
systemed
2024-01-10 18:01:27 +00:00
15 changed files with 555 additions and 75 deletions
+1
View File
@@ -96,6 +96,7 @@ file(GLOB tilemaker_src_files
src/shared_data.cpp
src/shp_mem_tiles.cpp
src/shp_processor.cpp
src/significant_tags.cpp
src/sorted_node_store.cpp
src/sorted_way_store.cpp
src/tag_map.cpp
+9
View File
@@ -110,6 +110,7 @@ tilemaker: \
src/shared_data.o \
src/shp_mem_tiles.o \
src/shp_processor.o \
src/significant_tags.o \
src/sorted_node_store.o \
src/sorted_way_store.o \
src/tag_map.o \
@@ -126,6 +127,7 @@ test: \
test_pbf_reader \
test_pooled_string \
test_relation_roles \
test_significant_tags \
test_sorted_node_store \
test_sorted_way_store
@@ -156,11 +158,18 @@ test_pooled_string: \
test/pooled_string.test.o
$(CXX) $(CXXFLAGS) -o test.pooled_string $^ $(INC) $(LIB) $(LDFLAGS) && ./test.pooled_string
test_relation_roles: \
src/relation_roles.o \
test/relation_roles.test.o
$(CXX) $(CXXFLAGS) -o test.relation_roles $^ $(INC) $(LIB) $(LDFLAGS) && ./test.relation_roles
test_significant_tags: \
src/significant_tags.o \
src/tag_map.o \
test/significant_tags.test.o
$(CXX) $(CXXFLAGS) -o test.significant_tags $^ $(INC) $(LIB) $(LDFLAGS) && ./test.significant_tags
test_sorted_node_store: \
src/external/streamvbyte_decode.o \
src/external/streamvbyte_encode.o \
+12 -9
View File
@@ -109,16 +109,19 @@ For example:
Your Lua file needs to supply a few things:
1. `node_keys`, a list of those OSM keys which indicate that a node should be processed
2. `node_function()`, a function to process an OSM node and add it to layers
3. `way_function()`, a function to process an OSM way and add it to layers
4. (optional) `init_function(name)`, a function to initialize Lua logic
5. (optional) `exit_function`, a function to finalize Lua logic (useful to show statistics)
6. (optional) `relation_scan_function`, a function to determine whether your Lua file wishes to process the given relation
7. (optional) `relation_function`, a function to process an OSM relation and add it to layers
8. (optional) `attribute_function`, a function to remap attributes from shapefiles
1. (optional) `node_keys`, a list of those OSM tags which indicate that a node should be processed
2. (optional) `way_keys`, a list of those OSM tags which indicate that a way should be processed
3. `node_function()`, a function to process an OSM node and add it to layers
4. `way_function()`, a function to process an OSM way and add it to layers
5. (optional) `init_function(name)`, a function to initialize Lua logic
6. (optional) `exit_function`, a function to finalize Lua logic (useful to show statistics)
7. (optional) `relation_scan_function`, a function to determine whether your Lua file wishes to process the given relation
8. (optional) `relation_function`, a function to process an OSM relation and add it to layers
9. (optional) `attribute_function`, a function to remap attributes from shapefiles
`node_keys` is a simple list (or in Lua parlance, a 'table') of OSM tag keys. If a node has one of those keys, it will be processed by `node_function`; if not, it'll be skipped. For example, if you wanted to show highway crossings and railway stations, it should be `{ "highway", "railway" }`. (This avoids the need to process the vast majority of nodes which contain no important tags at all.)
`node_keys` is a simple list (or in Lua parlance, a 'table') of OSM tags. If a node has one of those keys, it will be processed by `node_function`; if not, it'll be skipped. For example, if you wanted to show highway crossings and railway stations, it should be `{ "highway", "railway" }`. (This avoids the need to process the vast majority of nodes which contain no important tags at all.)
`way_keys` is similar to `node_keys`, but for ways. For ways, you may also wish to express the filter in terms of the tag value, or as an inversion. For example, to exclude buildings: `way_keys = {"~building"}`. To build a map only of major roads: `way_keys = {"highway=motorway", "highway=trunk", "highway=primary", "highway=secondary"}`
`node_function` and `way_function` work the same way. They are called with an OSM object; you then inspect the tags of that object, and put it in your vector tiles' layers based on those tags. In essence, the process is:
+5 -2
View File
@@ -19,6 +19,7 @@
#include <boost/container/flat_map.hpp>
class TagMap;
class SignificantTags;
// Lua
extern "C" {
@@ -73,6 +74,7 @@ public:
~OsmLuaProcessing();
// ---- Helpers provided for main routine
void handleUserSignal(int signum);
// Has this object been assigned to any layers?
bool empty();
@@ -94,7 +96,7 @@ public:
bool scanRelation(WayID id, const TagMap& tags);
/// \brief We are now processing a significant node
void setNode(NodeID id, LatpLon node, const TagMap& tags);
bool setNode(NodeID id, LatpLon node, const TagMap& tags);
/// \brief We are now processing a way
bool setWay(WayID wayId, LatpLonVec const &llVec, const TagMap& tags);
@@ -211,7 +213,8 @@ public:
void setVectorLayerMetadata(const uint_least8_t layer, const std::string &key, const uint type);
std::vector<std::string> GetSignificantNodeKeys();
SignificantTags GetSignificantNodeKeys();
SignificantTags GetSignificantWayKeys();
// ---- Cached geometries creation
+26 -1
View File
@@ -19,6 +19,21 @@ extern bool verbose;
class NodeStore;
class WayStore;
class UsedObjects {
public:
enum class Status: bool { Disabled = false, Enabled = true };
UsedObjects(Status status);
bool test(NodeID id);
void set(NodeID id);
void enable();
void clear();
private:
Status status;
std::vector<std::mutex> mutex;
std::vector<std::vector<bool>> ids;
};
// A comparator for data_view so it can be used in boost's flat_map
struct DataViewLessThan {
bool operator()(const protozero::data_view& a, const protozero::data_view& b) const {
@@ -206,8 +221,18 @@ protected:
UsedWays used_ways;
public:
UsedObjects usedNodes;
UsedObjects usedRelations;
OSMStore(NodeStore& nodes, WayStore& ways): nodes(nodes), ways(ways)
OSMStore(NodeStore& nodes, WayStore& ways):
nodes(nodes),
ways(ways),
// We only track usedNodes if way_keys is present; a node is used if it's
// a member of a way used by a used relation, or a way that meets the way_keys
// criteria.
usedNodes(UsedObjects::Status::Disabled),
// A relation is used only if it was previously accepted from relation_scan_function
usedRelations(UsedObjects::Status::Enabled)
{
reopen();
}
+11 -5
View File
@@ -8,6 +8,7 @@
#include <mutex>
#include <map>
#include "osm_store.h"
#include "significant_tags.h"
#include "pbf_reader.h"
#include "tag_map.h"
#include <protozero/data_view.hpp>
@@ -44,7 +45,7 @@ struct IndexedBlockMetadata: BlockMetadata {
class PbfProcessor
{
public:
enum class ReadPhase { Nodes = 1, Ways = 2, Relations = 4, RelationScan = 8 };
enum class ReadPhase { Nodes = 1, Ways = 2, Relations = 4, RelationScan = 8, WayScan = 16 };
PbfProcessor(OSMStore &osmStore);
@@ -54,7 +55,8 @@ public:
int ReadPbfFile(
uint shards,
bool hasSortTypeThenID,
const std::unordered_set<std::string>& nodeKeys,
const SignificantTags& nodeKeys,
const SignificantTags& wayKeys,
unsigned int threadNum,
const pbfreader_generate_stream& generate_stream,
const pbfreader_generate_output& generate_output,
@@ -77,28 +79,32 @@ private:
std::istream &infile,
OsmLuaProcessing &output,
const BlockMetadata& blockMetadata,
const std::unordered_set<std::string>& nodeKeys,
const SignificantTags& nodeKeys,
const SignificantTags& wayKeys,
bool locationsOnWays,
ReadPhase phase,
uint shard,
uint effectiveShard
);
bool ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const std::unordered_set<int>& nodeKeyPositions);
bool ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& nodeKeys);
bool ReadWays(
OsmLuaProcessing& output,
PbfReader::PrimitiveGroup& pg,
const PbfReader::PrimitiveBlock& pb,
const SignificantTags& wayKeys,
bool locationsOnWays,
uint shard,
uint effectiveShards
);
bool ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb);
bool ScanWays(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& wayKeys);
bool ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& wayKeys);
bool ReadRelations(
OsmLuaProcessing& output,
PbfReader::PrimitiveGroup& pg,
const PbfReader::PrimitiveBlock& pb,
const BlockMetadata& blockMetadata,
const SignificantTags& wayKeys,
uint shard,
uint effectiveShards
);
+39
View File
@@ -0,0 +1,39 @@
#ifndef SIGNIFICANT_TAGS_H
#define SIGNIFICANT_TAGS_H
#include <string>
#include <vector>
class TagMap;
// Data structures to permit users to express filters on which nodes/ways
// to be accepted.
//
// Filters are of the shape: [~]key-name[=value-name]
//
// When a tilde is present, the filter's meaning is inverted.
struct TagFilter {
bool accept;
std::string key;
std::string value;
bool operator==(const TagFilter& other) const {
return accept == other.accept && key == other.key && value == other.value;
}
};
class SignificantTags {
public:
SignificantTags();
SignificantTags(std::vector<std::string> rawTags);
bool filter(const TagMap& tags) const;
static TagFilter parseFilter(std::string rawTag);
bool enabled() const;
private:
bool enabled_;
std::vector<TagFilter> filters;
};
#endif
+19
View File
@@ -23,11 +23,17 @@
// This is true since the strings are owned by the protobuf block reader
// 3. Max number of tag values will fit in a short
// OSM limit is 5,000 tags per object
struct Tag {
protozero::data_view key;
protozero::data_view value;
};
class TagMap {
public:
TagMap();
void reset();
bool empty();
void addTag(const protozero::data_view& key, const protozero::data_view& value);
// Return -1 if key not found, else return its keyLoc.
@@ -41,6 +47,19 @@ public:
boost::container::flat_map<std::string, std::string> exportToBoostMap() const;
struct Iterator {
const TagMap& map;
size_t shard = 0;
size_t offset = 0;
bool operator!=(const Iterator& other) const;
void operator++();
Tag operator*() const;
};
Iterator begin() const;
Iterator end() const;
private:
uint32_t ensureString(
std::vector<std::vector<const protozero::data_view*>>& vector,
+51 -5
View File
@@ -5,9 +5,11 @@
#include "helpers.h"
#include "coordinates_geom.h"
#include "osm_mem_tiles.h"
#include "significant_tags.h"
#include "tag_map.h"
#include "node_store.h"
#include "polylabel.h"
#include <signal.h>
using namespace std;
@@ -15,6 +17,26 @@ const std::string EMPTY_STRING = "";
thread_local kaguya::State *g_luaState = nullptr;
thread_local OsmLuaProcessing* osmLuaProcessing = nullptr;
void handleOsmLuaProcessingUserSignal(int signum) {
osmLuaProcessing->handleUserSignal(signum);
}
class Sigusr1Handler {
public:
Sigusr1Handler() {
#ifndef _WIN32
signal(SIGUSR1, handleOsmLuaProcessingUserSignal);
#endif
}
void initialize() {
// No-op just to ensure the compiler doesn't optimize away
// the handler.
}
};
thread_local Sigusr1Handler sigusr1Handler;
// A key in `currentTags`. If Lua code refers to an absent key,
// found will be false.
struct KnownTagKey {
@@ -158,6 +180,8 @@ OsmLuaProcessing::OsmLuaProcessing(
layers(layers),
materializeGeometries(materializeGeometries) {
sigusr1Handler.initialize();
// ---- Initialise Lua
g_luaState = &luaState;
luaState.setErrorHandler(lua_error_handler);
@@ -213,6 +237,10 @@ OsmLuaProcessing::~OsmLuaProcessing() {
luaState("if exit_function~=nil then exit_function() end");
}
void OsmLuaProcessing::handleUserSignal(int signum) {
std::cout << "processing OSM ID " << originalOsmID << std::endl;
}
// ---- Helpers provided for main routine
// Has this object been assigned to any layers?
@@ -871,8 +899,7 @@ bool OsmLuaProcessing::scanRelation(WayID id, const TagMap& tags) {
return true;
}
void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const TagMap& tags) {
bool OsmLuaProcessing::setNode(NodeID id, LatpLon node, const TagMap& tags) {
reset();
originalOsmID = id;
isWay = false;
@@ -899,7 +926,11 @@ void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const TagMap& tags) {
for (auto &output : finalizeOutputs()) {
osmMemTiles.addObjectToSmallIndex(index, output, originalOsmID);
}
}
return true;
}
return false;
}
// We are now processing a way
@@ -995,10 +1026,25 @@ void OsmLuaProcessing::setRelation(
}
}
vector<string> OsmLuaProcessing::GetSignificantNodeKeys() {
return luaState["node_keys"];
SignificantTags OsmLuaProcessing::GetSignificantNodeKeys() {
if (!!luaState["node_keys"]) {
std::vector<string> keys = luaState["node_keys"];
return SignificantTags(keys);
}
return SignificantTags();
}
SignificantTags OsmLuaProcessing::GetSignificantWayKeys() {
if (!!luaState["way_keys"]) {
std::vector<string> keys = luaState["way_keys"];
return SignificantTags(keys);
}
return SignificantTags();
}
std::vector<OutputObject> OsmLuaProcessing::finalizeOutputs() {
std::vector<OutputObject> list;
list.reserve(this->outputs.size());
+34
View File
@@ -17,6 +17,40 @@ static inline bool isClosed(const std::vector<LatpLon>& way) {
return way.begin() == way.end();
}
UsedObjects::UsedObjects(Status status): status(status), mutex(256), ids(256 * 1024) {
}
bool UsedObjects::test(NodeID id) {
if (status == Status::Disabled)
return true;
const size_t chunk = id / 65536;
if (ids[chunk].size() == 0)
return false;
return ids[chunk][id % 65536];
}
void UsedObjects::enable() {
status = Status::Enabled;
}
void UsedObjects::set(NodeID id) {
const size_t chunk = id / 65536;
std::lock_guard<std::mutex> lock(mutex[chunk % mutex.size()]);
if (ids[chunk].size() == 0)
ids[chunk].resize(65536);
ids[chunk][id % 65536] = true;
}
void UsedObjects::clear() {
// This data is not needed after PbfProcessor's ReadPhase::Nodes has completed,
// and it takes up to ~1.5GB of RAM.
ids.clear();
}
void OSMStore::open(std::string const &osm_store_filename)
{
void_mmap_allocator::openMmapFile(osm_store_filename);
+93 -44
View File
@@ -24,7 +24,7 @@ PbfProcessor::PbfProcessor(OSMStore &osmStore)
: osmStore(osmStore)
{ }
bool PbfProcessor::ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const unordered_set<int>& nodeKeyPositions)
bool PbfProcessor::ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& nodeKeys)
{
// ---- Read nodes
std::vector<NodeStore::element_t> nodes;
@@ -35,30 +35,24 @@ bool PbfProcessor::ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup
NodeID nodeId = node.id;
LatpLon latplon = { int(lat2latp(double(node.lat)/10000000.0)*10000000.0), node.lon };
bool significant = false;
for (int i = node.tagStart; i < node.tagEnd; i += 2) {
auto keyIndex = pg.translateNodeKeyValue(i);
tags.reset();
// For tagged nodes, call Lua, then save the OutputObject
for (int n = node.tagStart; n < node.tagEnd; n += 2) {
auto keyIndex = pg.translateNodeKeyValue(n);
auto valueIndex = pg.translateNodeKeyValue(n + 1);
if (nodeKeyPositions.find(keyIndex) != nodeKeyPositions.end()) {
significant = true;
}
const protozero::data_view& key = pb.stringTable[keyIndex];
const protozero::data_view& value = pb.stringTable[valueIndex];
tags.addTag(key, value);
}
nodes.push_back(std::make_pair(static_cast<NodeID>(nodeId), latplon));
bool emitted = false;
if (!tags.empty() && nodeKeys.filter(tags)) {
emitted = output.setNode(static_cast<NodeID>(nodeId), latplon, tags);
}
if (significant) {
tags.reset();
// For tagged nodes, call Lua, then save the OutputObject
for (int n = node.tagStart; n < node.tagEnd; n += 2) {
auto keyIndex = pg.translateNodeKeyValue(n);
auto valueIndex = pg.translateNodeKeyValue(n + 1);
const protozero::data_view& key = pb.stringTable[keyIndex];
const protozero::data_view& value = pb.stringTable[valueIndex];
tags.addTag(key, value);
}
output.setNode(static_cast<NodeID>(nodeId), latplon, tags);
}
if (emitted || osmStore.usedNodes.test(nodeId))
nodes.push_back(std::make_pair(static_cast<NodeID>(nodeId), latplon));
}
if (nodes.size() > 0) {
@@ -72,6 +66,7 @@ bool PbfProcessor::ReadWays(
OsmLuaProcessing &output,
PbfReader::PrimitiveGroup& pg,
const PbfReader::PrimitiveBlock& pb,
const SignificantTags& wayKeys,
bool locationsOnWays,
uint shard,
uint effectiveShards
@@ -89,6 +84,12 @@ bool PbfProcessor::ReadWays(
std::vector<NodeID> nodeVec;
for (PbfReader::Way pbfWay : pg.ways()) {
tags.reset();
readTags(pbfWay, pb, tags);
if (!osmStore.way_is_used(pbfWay.id) && !wayKeys.filter(tags))
continue;
llVec.clear();
nodeVec.clear();
@@ -132,8 +133,6 @@ bool PbfProcessor::ReadWays(
if (llVec.empty()) continue;
try {
tags.reset();
readTags(pbfWay, pb, tags);
bool emitted = output.setWay(static_cast<WayID>(pbfWay.id), llVec, tags);
// If we need it for later, store the way's coordinates in the global way store
@@ -160,7 +159,32 @@ bool PbfProcessor::ReadWays(
return true;
}
bool PbfProcessor::ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb) {
bool PbfProcessor::ScanWays(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& wayKeys) {
// Scan ways to see which nodes we need to save.
//
// This phase only runs if the Lua script has declared a `way_keys` variable.
if (pg.ways().empty())
return false;
TagMap tags;
// Note: unlike ScanRelations, we don't call into Lua. Instead, we statically inspect
// the tags on each way to decide if it will be emitted.
for (auto& way : pg.ways()) {
tags.reset();
readTags(way, pb, tags);
if (osmStore.way_is_used(way.id) || wayKeys.filter(tags)) {
for (const auto id : way.refs) {
osmStore.usedNodes.set(id);
}
}
}
return true;
}
bool PbfProcessor::ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& wayKeys) {
// Scan relations to see which ways we need to save
if (pg.relations().empty())
return false;
@@ -174,14 +198,20 @@ bool PbfProcessor::ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveG
bool isMultiPolygon = relationIsType(pbfRelation, typeKey, mpKey);
bool isAccepted = false;
WayID relid = static_cast<WayID>(pbfRelation.id);
tags.reset();
readTags(pbfRelation, pb, tags);
if (!isMultiPolygon) {
if (output.canReadRelations()) {
tags.reset();
readTags(pbfRelation, pb, tags);
isAccepted = output.scanRelation(relid, tags);
}
if (!isAccepted) continue;
} else {
if (!wayKeys.filter(tags))
continue;
}
osmStore.usedRelations.set(relid);
for (int n=0; n < pbfRelation.memids.size(); n++) {
uint64_t lastID = pbfRelation.memids[n];
@@ -210,6 +240,7 @@ bool PbfProcessor::ReadRelations(
PbfReader::PrimitiveGroup& pg,
const PbfReader::PrimitiveBlock& pb,
const BlockMetadata& blockMetadata,
const SignificantTags& wayKeys,
uint shard,
uint effectiveShards
) {
@@ -263,7 +294,8 @@ bool PbfProcessor::ReadRelations(
try {
tags.reset();
readTags(pbfRelation, pb, tags);
output.setRelation(pb.stringTable, pbfRelation, outerWayVec, innerWayVec, tags, isMultiPolygon, isInnerOuter);
if (osmStore.usedRelations.test(pbfRelation.id) || wayKeys.filter(tags))
output.setRelation(pb.stringTable, pbfRelation, outerWayVec, innerWayVec, tags, isMultiPolygon, isInnerOuter);
} catch (std::out_of_range &err) {
// Relation is missing a member?
@@ -281,7 +313,8 @@ bool PbfProcessor::ReadBlock(
std::istream& infile,
OsmLuaProcessing& output,
const BlockMetadata& blockMetadata,
const unordered_set<string>& nodeKeys,
const SignificantTags& nodeKeys,
const SignificantTags& wayKeys,
bool locationsOnWays,
ReadPhase phase,
uint shard,
@@ -299,14 +332,6 @@ bool PbfProcessor::ReadBlock(
// Keep count of groups read during this phase.
std::size_t read_groups = 0;
// Read the string table, and pre-calculate the positions of valid node keys
unordered_set<int> nodeKeyPositions;
for (auto it : nodeKeys) {
//nodeKeyPositions.insert(findStringPosition(pb, it));
auto rv = findStringPosition(pb, it);
nodeKeyPositions.insert(rv);
}
int primitiveGroupSize = 0;
for (auto& pg : pb.groups()) {
primitiveGroupSize++;
@@ -329,7 +354,7 @@ bool PbfProcessor::ReadBlock(
};
if(phase == ReadPhase::Nodes) {
bool done = ReadNodes(output, pg, pb, nodeKeyPositions);
bool done = ReadNodes(output, pg, pb, nodeKeys);
if(done) {
output_progress();
++read_groups;
@@ -337,9 +362,21 @@ bool PbfProcessor::ReadBlock(
}
}
if(phase == ReadPhase::WayScan) {
bool done = ScanWays(output, pg, pb, wayKeys);
if(done) {
if (ioMutex.try_lock()) {
std::cout << "\r(Scanning for nodes used in ways: " << (100*blocksProcessed.load()/blocksToProcess.load()) << "%) ";
std::cout.flush();
ioMutex.unlock();
}
continue;
}
}
if(phase == ReadPhase::RelationScan) {
osmStore.ensureUsedWaysInited();
bool done = ScanRelations(output, pg, pb);
bool done = ScanRelations(output, pg, pb, wayKeys);
if(done) {
if (ioMutex.try_lock()) {
std::cout << "\r(Scanning for ways used in relations: " << (100*blocksProcessed.load()/blocksToProcess.load()) << "%) ";
@@ -351,7 +388,7 @@ bool PbfProcessor::ReadBlock(
}
if(phase == ReadPhase::Ways) {
bool done = ReadWays(output, pg, pb, locationsOnWays, shard, effectiveShards);
bool done = ReadWays(output, pg, pb, wayKeys, locationsOnWays, shard, effectiveShards);
if(done) {
output_progress();
++read_groups;
@@ -360,7 +397,7 @@ bool PbfProcessor::ReadBlock(
}
if(phase == ReadPhase::Relations) {
bool done = ReadRelations(output, pg, pb, blockMetadata, shard, effectiveShards);
bool done = ReadRelations(output, pg, pb, blockMetadata, wayKeys, shard, effectiveShards);
if(done) {
output_progress();
++read_groups;
@@ -412,7 +449,8 @@ bool blockHasPrimitiveGroupSatisfying(
int PbfProcessor::ReadPbfFile(
uint shards,
bool hasSortTypeThenID,
unordered_set<string> const& nodeKeys,
const SignificantTags& nodeKeys,
const SignificantTags& wayKeys,
unsigned int threadNum,
const pbfreader_generate_stream& generate_stream,
const pbfreader_generate_output& generate_output,
@@ -511,7 +549,16 @@ int PbfProcessor::ReadPbfFile(
}
std::vector<ReadPhase> all_phases = { ReadPhase::RelationScan, ReadPhase::Nodes, ReadPhase::Ways, ReadPhase::Relations };
std::vector<ReadPhase> all_phases = { ReadPhase::RelationScan };
if (wayKeys.enabled()) {
osmStore.usedNodes.enable();
all_phases.push_back(ReadPhase::WayScan);
}
all_phases.push_back(ReadPhase::Nodes);
all_phases.push_back(ReadPhase::Ways);
all_phases.push_back(ReadPhase::Relations);
for(auto phase: all_phases) {
uint effectiveShards = 1;
@@ -561,6 +608,7 @@ int PbfProcessor::ReadPbfFile(
for (const auto& entry : blocks) {
if ((phase == ReadPhase::Nodes && entry.second.hasNodes) ||
(phase == ReadPhase::RelationScan && entry.second.hasRelations) ||
(phase == ReadPhase::WayScan && entry.second.hasWays) ||
(phase == ReadPhase::Ways && entry.second.hasWays) ||
(phase == ReadPhase::Relations && entry.second.hasRelations))
filteredBlocks[entry.first] = entry.second;
@@ -597,7 +645,7 @@ int PbfProcessor::ReadPbfFile(
{
for(const std::vector<IndexedBlockMetadata>& blockRange: blockRanges) {
boost::asio::post(pool, [=, &blockRange, &blocks, &block_mutex, &nodeKeys]() {
boost::asio::post(pool, [=, &blockRange, &blocks, &block_mutex, &nodeKeys, &wayKeys]() {
if (phase == ReadPhase::Nodes)
osmStore.nodes.batchStart();
if (phase == ReadPhase::Ways)
@@ -607,7 +655,7 @@ int PbfProcessor::ReadPbfFile(
auto infile = generate_stream();
auto output = generate_output();
if(ReadBlock(*infile, *output, indexedBlockMetadata, nodeKeys, locationsOnWays, phase, shard, effectiveShards)) {
if(ReadBlock(*infile, *output, indexedBlockMetadata, nodeKeys, wayKeys, locationsOnWays, phase, shard, effectiveShards)) {
const std::lock_guard<std::mutex> lock(block_mutex);
blocks.erase(indexedBlockMetadata.index);
}
@@ -628,6 +676,7 @@ int PbfProcessor::ReadPbfFile(
if(phase == ReadPhase::Nodes) {
osmStore.nodes.finalize(threadNum);
osmStore.usedNodes.clear();
}
if(phase == ReadPhase::Ways) {
osmStore.ways.finalize(threadNum);
+88
View File
@@ -0,0 +1,88 @@
#include <stdexcept>
#include "significant_tags.h"
#include "tag_map.h"
TagFilter SignificantTags::parseFilter(std::string rawTag) {
TagFilter rv { true };
std::string input = rawTag;
if (input.size() > 0 && input[0] == '~') {
rv.accept = false;
input = input.substr(1);
}
size_t n = input.find("=");
if (n == std::string::npos) {
rv.key = input;
return rv;
}
rv.key = input.substr(0, n);
rv.value = input.substr(n + 1);
return rv;
}
SignificantTags::SignificantTags(): enabled_(false) {}
SignificantTags::SignificantTags(std::vector<std::string> rawTags): enabled_(true) {
for (const std::string& rawTag : rawTags) {
filters.push_back(parseFilter(rawTag));
}
if (filters.empty())
return;
bool accept = filters[0].accept;
size_t i = 0;
for (const auto& filter : filters) {
if (filter.accept != accept) {
throw std::runtime_error("cannot mix reject and accept filters: " + rawTags[0] + ", " + rawTags[i]);
}
i++;
}
}
bool SignificantTags::enabled() const { return enabled_; }
bool SignificantTags::filter(const TagMap& tags) const {
if (!enabled_)
return true;
if (filters.empty())
return false;
bool defaultReject = filters[0].accept;
if (defaultReject) {
// There must be at least one tag matched by the filters.
for (const Tag& tag : tags) {
for (const TagFilter& filter : filters) {
if (filter.key == tag.key && (filter.value.empty() || filter.value == tag.value))
return true;
}
}
return false;
}
// There must be at least one tag not matched by any filters.
for (const Tag& tag : tags) {
// If no filters match this tag,
bool hadMatch = false;
for (const TagFilter& filter : filters) {
if (filter.key == tag.key && (filter.value.empty() || filter.value == tag.value)) {
hadMatch = true;
break;
}
}
if (!hadMatch)
return true;
}
return false;
}
+42 -4
View File
@@ -16,6 +16,13 @@ void TagMap::reset() {
}
}
bool TagMap::empty() {
for (int i = 0; i < keys.size(); i++)
if (keys[i].size() > 0)
return false;
return true;
}
const std::size_t hashString(const std::string& str) {
// This is a pretty crappy hash function in terms of bit
// avalanching and distribution of output values.
@@ -60,16 +67,12 @@ uint32_t TagMap::ensureString(
void TagMap::addTag(const protozero::data_view& key, const protozero::data_view& value) {
uint32_t valueLoc = ensureString(values, value);
// std::cout << "valueLoc = " << valueLoc << std::endl;
uint32_t keyLoc = ensureString(keys, key);
// std::cout << "keyLoc = " << keyLoc << std::endl;
const uint16_t shard = keyLoc >> 16;
const uint16_t pos = keyLoc;
// std::cout << "shard=" << shard << ", pos=" << pos << std::endl;
if (key2value[shard].size() <= pos) {
// std::cout << "growing shard" << std::endl;
key2value[shard].resize(pos + 1);
}
@@ -133,3 +136,38 @@ boost::container::flat_map<std::string, std::string> TagMap::exportToBoostMap()
return rv;
}
TagMap::Iterator TagMap::begin() const {
size_t shard = 0;
while(keys.size() > shard && keys[shard].size() == 0)
shard++;
return Iterator{*this, shard, 0};
}
TagMap::Iterator TagMap::end() const {
return Iterator{*this, keys.size(), 0};
}
bool TagMap::Iterator::operator!=(const Iterator& other) const {
return other.shard != shard || other.offset != offset;
}
void TagMap::Iterator::operator++() {
++offset;
if (offset >= map.keys[shard].size()) {
offset = 0;
shard++;
// Advance to the next non-empty shard.
while(map.keys.size() > shard && map.keys[shard].size() == 0)
shard++;
}
}
Tag TagMap::Iterator::operator*() const {
const uint32_t valueLoc = map.key2value[shard][offset];
return Tag{
*map.keys[shard][offset],
*map.getValue(valueLoc)
};
}
+6 -5
View File
@@ -42,6 +42,7 @@
#include "helpers.h"
#include "coordinates.h"
#include "coordinates_geom.h"
#include "significant_tags.h"
#include "attribute_store.h"
#include "output_object.h"
@@ -255,10 +256,9 @@ int main(const int argc, const char* argv[]) {
}
shpMemTiles.reportSize();
// ---- Read significant node tags
vector<string> nodeKeyVec = osmLuaProcessing.GetSignificantNodeKeys();
unordered_set<string> nodeKeys(nodeKeyVec.begin(), nodeKeyVec.end());
// ---- Read significant node/way tags
const SignificantTags significantNodeTags = osmLuaProcessing.GetSignificantNodeKeys();
const SignificantTags significantWayTags = osmLuaProcessing.GetSignificantWayKeys();
// ---- Read all PBFs
@@ -274,7 +274,8 @@ int main(const int argc, const char* argv[]) {
int ret = pbfProcessor.ReadPbfFile(
nodeStore->shards(),
hasSortTypeThenID,
nodeKeys,
significantNodeTags,
significantWayTags,
options.threadNum,
[&]() {
thread_local std::shared_ptr<ifstream> pbfStream(new ifstream(inputFile, ios::in | ios::binary));
+119
View File
@@ -0,0 +1,119 @@
#include <iostream>
#include "external/minunit.h"
#include "significant_tags.h"
#include "tag_map.h"
MU_TEST(test_parse_filter) {
{
TagFilter expected{true, "foo", ""};
mu_check(SignificantTags::parseFilter("foo") == expected);
}
{
TagFilter expected{false, "foo", ""};
mu_check(SignificantTags::parseFilter("~foo") == expected);
}
{
TagFilter expected{true, "foo", "bar"};
mu_check(SignificantTags::parseFilter("foo=bar") == expected);
}
{
TagFilter expected{false, "foo", "bar"};
mu_check(SignificantTags::parseFilter("~foo=bar") == expected);
}
}
MU_TEST(test_invalid_significant_tags) {
bool threw = false;
try {
// Filters must be all accept, or all reject, not a mix.
SignificantTags tags({"a", "~b"});
} catch (...) {
threw = true;
}
mu_check(threw);
}
MU_TEST(test_significant_tags) {
const std::string building = "building";
const std::string yes = "yes";
const std::string name = "name";
const std::string nameValue = "Some name";
const std::string power = "power";
const std::string tower = "tower";
// If created with no list, it's not enabled and all things pass filter.
// This is the case when people omit `node_keys` or `way_keys`.
{
SignificantTags tags;
TagMap map;
mu_check(tags.filter(map));
}
// If created with empty list, it rejects all things.
// This is the case when people write `way_keys = {}`, e.g. when creating
// an extract that only parses nodes.
{
std::vector<std::string> empty;
SignificantTags tags(empty);
TagMap map;
mu_check(!tags.filter(map));
}
// If created in default-accept mode, it accepts anything with an unmatched tag.
// This is the case when people write `way_keys = {"-building"}`
{
std::vector<std::string> defaultAccept{"~building"};
SignificantTags tags(defaultAccept);
{
TagMap map;
map.addTag(building, yes);
mu_check(!tags.filter(map));
}
{
TagMap map;
map.addTag(building, yes);
map.addTag(name, nameValue);
mu_check(tags.filter(map));
}
}
// If created in default-reject mode, it accepts anything with a matched tag.
// This is the case when people write `way_keys = {"power=tower"}`
{
std::vector<std::string> defaultReject{"power=tower"};
SignificantTags tags(defaultReject);
{
TagMap map;
mu_check(!tags.filter(map));
}
{
TagMap map;
map.addTag(power, tower);
mu_check(tags.filter(map));
}
}
}
MU_TEST_SUITE(test_suite_significant_tags) {
MU_RUN_TEST(test_parse_filter);
MU_RUN_TEST(test_significant_tags);
MU_RUN_TEST(test_invalid_significant_tags);
}
int main() {
MU_RUN_SUITE(test_suite_significant_tags);
MU_REPORT();
return MU_EXIT_CODE;
}