diff --git a/CMakeLists.txt b/CMakeLists.txt index fe0e049..3ff4b32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,7 @@ file(GLOB tilemaker_src_files src/shared_data.cpp src/shp_mem_tiles.cpp src/shp_processor.cpp + src/significant_tags.cpp src/sorted_node_store.cpp src/sorted_way_store.cpp src/tag_map.cpp diff --git a/Makefile b/Makefile index 54dcd11..d4f3ea8 100644 --- a/Makefile +++ b/Makefile @@ -110,6 +110,7 @@ tilemaker: \ src/shared_data.o \ src/shp_mem_tiles.o \ src/shp_processor.o \ + src/significant_tags.o \ src/sorted_node_store.o \ src/sorted_way_store.o \ src/tag_map.o \ @@ -126,6 +127,7 @@ test: \ test_pbf_reader \ test_pooled_string \ test_relation_roles \ + test_significant_tags \ test_sorted_node_store \ test_sorted_way_store @@ -156,11 +158,18 @@ test_pooled_string: \ test/pooled_string.test.o $(CXX) $(CXXFLAGS) -o test.pooled_string $^ $(INC) $(LIB) $(LDFLAGS) && ./test.pooled_string + test_relation_roles: \ src/relation_roles.o \ test/relation_roles.test.o $(CXX) $(CXXFLAGS) -o test.relation_roles $^ $(INC) $(LIB) $(LDFLAGS) && ./test.relation_roles +test_significant_tags: \ + src/significant_tags.o \ + src/tag_map.o \ + test/significant_tags.test.o + $(CXX) $(CXXFLAGS) -o test.significant_tags $^ $(INC) $(LIB) $(LDFLAGS) && ./test.significant_tags + test_sorted_node_store: \ src/external/streamvbyte_decode.o \ src/external/streamvbyte_encode.o \ diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index d605d15..f81b5f5 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -109,16 +109,19 @@ For example: Your Lua file needs to supply a few things: -1. `node_keys`, a list of those OSM keys which indicate that a node should be processed -2. `node_function()`, a function to process an OSM node and add it to layers -3. `way_function()`, a function to process an OSM way and add it to layers -4. (optional) `init_function(name)`, a function to initialize Lua logic -5. (optional) `exit_function`, a function to finalize Lua logic (useful to show statistics) -6. (optional) `relation_scan_function`, a function to determine whether your Lua file wishes to process the given relation -7. (optional) `relation_function`, a function to process an OSM relation and add it to layers -8. (optional) `attribute_function`, a function to remap attributes from shapefiles +1. (optional) `node_keys`, a list of those OSM tags which indicate that a node should be processed +2. (optional) `way_keys`, a list of those OSM tags which indicate that a way should be processed +3. `node_function()`, a function to process an OSM node and add it to layers +4. `way_function()`, a function to process an OSM way and add it to layers +5. (optional) `init_function(name)`, a function to initialize Lua logic +6. (optional) `exit_function`, a function to finalize Lua logic (useful to show statistics) +7. (optional) `relation_scan_function`, a function to determine whether your Lua file wishes to process the given relation +8. (optional) `relation_function`, a function to process an OSM relation and add it to layers +9. (optional) `attribute_function`, a function to remap attributes from shapefiles -`node_keys` is a simple list (or in Lua parlance, a 'table') of OSM tag keys. If a node has one of those keys, it will be processed by `node_function`; if not, it'll be skipped. For example, if you wanted to show highway crossings and railway stations, it should be `{ "highway", "railway" }`. (This avoids the need to process the vast majority of nodes which contain no important tags at all.) +`node_keys` is a simple list (or in Lua parlance, a 'table') of OSM tags. If a node has one of those keys, it will be processed by `node_function`; if not, it'll be skipped. For example, if you wanted to show highway crossings and railway stations, it should be `{ "highway", "railway" }`. (This avoids the need to process the vast majority of nodes which contain no important tags at all.) + +`way_keys` is similar to `node_keys`, but for ways. For ways, you may also wish to express the filter in terms of the tag value, or as an inversion. For example, to exclude buildings: `way_keys = {"~building"}`. To build a map only of major roads: `way_keys = {"highway=motorway", "highway=trunk", "highway=primary", "highway=secondary"}` `node_function` and `way_function` work the same way. They are called with an OSM object; you then inspect the tags of that object, and put it in your vector tiles' layers based on those tags. In essence, the process is: diff --git a/include/osm_lua_processing.h b/include/osm_lua_processing.h index e8f3593..52ab2e5 100644 --- a/include/osm_lua_processing.h +++ b/include/osm_lua_processing.h @@ -19,6 +19,7 @@ #include class TagMap; +class SignificantTags; // Lua extern "C" { @@ -73,6 +74,7 @@ public: ~OsmLuaProcessing(); // ---- Helpers provided for main routine + void handleUserSignal(int signum); // Has this object been assigned to any layers? bool empty(); @@ -94,7 +96,7 @@ public: bool scanRelation(WayID id, const TagMap& tags); /// \brief We are now processing a significant node - void setNode(NodeID id, LatpLon node, const TagMap& tags); + bool setNode(NodeID id, LatpLon node, const TagMap& tags); /// \brief We are now processing a way bool setWay(WayID wayId, LatpLonVec const &llVec, const TagMap& tags); @@ -211,7 +213,8 @@ public: void setVectorLayerMetadata(const uint_least8_t layer, const std::string &key, const uint type); - std::vector GetSignificantNodeKeys(); + SignificantTags GetSignificantNodeKeys(); + SignificantTags GetSignificantWayKeys(); // ---- Cached geometries creation diff --git a/include/osm_store.h b/include/osm_store.h index 464aadd..0c20463 100644 --- a/include/osm_store.h +++ b/include/osm_store.h @@ -19,6 +19,21 @@ extern bool verbose; class NodeStore; class WayStore; +class UsedObjects { +public: + enum class Status: bool { Disabled = false, Enabled = true }; + UsedObjects(Status status); + bool test(NodeID id); + void set(NodeID id); + void enable(); + void clear(); + +private: + Status status; + std::vector mutex; + std::vector> ids; +}; + // A comparator for data_view so it can be used in boost's flat_map struct DataViewLessThan { bool operator()(const protozero::data_view& a, const protozero::data_view& b) const { @@ -206,8 +221,18 @@ protected: UsedWays used_ways; public: + UsedObjects usedNodes; + UsedObjects usedRelations; - OSMStore(NodeStore& nodes, WayStore& ways): nodes(nodes), ways(ways) + OSMStore(NodeStore& nodes, WayStore& ways): + nodes(nodes), + ways(ways), + // We only track usedNodes if way_keys is present; a node is used if it's + // a member of a way used by a used relation, or a way that meets the way_keys + // criteria. + usedNodes(UsedObjects::Status::Disabled), + // A relation is used only if it was previously accepted from relation_scan_function + usedRelations(UsedObjects::Status::Enabled) { reopen(); } diff --git a/include/pbf_processor.h b/include/pbf_processor.h index 74e0ce3..daaf39d 100644 --- a/include/pbf_processor.h +++ b/include/pbf_processor.h @@ -8,6 +8,7 @@ #include #include #include "osm_store.h" +#include "significant_tags.h" #include "pbf_reader.h" #include "tag_map.h" #include @@ -44,7 +45,7 @@ struct IndexedBlockMetadata: BlockMetadata { class PbfProcessor { public: - enum class ReadPhase { Nodes = 1, Ways = 2, Relations = 4, RelationScan = 8 }; + enum class ReadPhase { Nodes = 1, Ways = 2, Relations = 4, RelationScan = 8, WayScan = 16 }; PbfProcessor(OSMStore &osmStore); @@ -54,7 +55,8 @@ public: int ReadPbfFile( uint shards, bool hasSortTypeThenID, - const std::unordered_set& nodeKeys, + const SignificantTags& nodeKeys, + const SignificantTags& wayKeys, unsigned int threadNum, const pbfreader_generate_stream& generate_stream, const pbfreader_generate_output& generate_output, @@ -77,28 +79,32 @@ private: std::istream &infile, OsmLuaProcessing &output, const BlockMetadata& blockMetadata, - const std::unordered_set& nodeKeys, + const SignificantTags& nodeKeys, + const SignificantTags& wayKeys, bool locationsOnWays, ReadPhase phase, uint shard, uint effectiveShard ); - bool ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const std::unordered_set& nodeKeyPositions); + bool ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& nodeKeys); bool ReadWays( OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, + const SignificantTags& wayKeys, bool locationsOnWays, uint shard, uint effectiveShards ); - bool ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb); + bool ScanWays(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& wayKeys); + bool ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& wayKeys); bool ReadRelations( OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const BlockMetadata& blockMetadata, + const SignificantTags& wayKeys, uint shard, uint effectiveShards ); diff --git a/include/significant_tags.h b/include/significant_tags.h new file mode 100644 index 0000000..770f31d --- /dev/null +++ b/include/significant_tags.h @@ -0,0 +1,39 @@ +#ifndef SIGNIFICANT_TAGS_H +#define SIGNIFICANT_TAGS_H + +#include +#include + +class TagMap; +// Data structures to permit users to express filters on which nodes/ways +// to be accepted. +// +// Filters are of the shape: [~]key-name[=value-name] +// +// When a tilde is present, the filter's meaning is inverted. + +struct TagFilter { + bool accept; + std::string key; + std::string value; + + bool operator==(const TagFilter& other) const { + return accept == other.accept && key == other.key && value == other.value; + } +}; + +class SignificantTags { +public: + SignificantTags(); + SignificantTags(std::vector rawTags); + bool filter(const TagMap& tags) const; + + static TagFilter parseFilter(std::string rawTag); + bool enabled() const; + +private: + bool enabled_; + std::vector filters; +}; + +#endif diff --git a/include/tag_map.h b/include/tag_map.h index f951d9e..1d29ef2 100644 --- a/include/tag_map.h +++ b/include/tag_map.h @@ -23,11 +23,17 @@ // This is true since the strings are owned by the protobuf block reader // 3. Max number of tag values will fit in a short // OSM limit is 5,000 tags per object +struct Tag { + protozero::data_view key; + protozero::data_view value; +}; + class TagMap { public: TagMap(); void reset(); + bool empty(); void addTag(const protozero::data_view& key, const protozero::data_view& value); // Return -1 if key not found, else return its keyLoc. @@ -41,6 +47,19 @@ public: boost::container::flat_map exportToBoostMap() const; + struct Iterator { + const TagMap& map; + size_t shard = 0; + size_t offset = 0; + + bool operator!=(const Iterator& other) const; + void operator++(); + Tag operator*() const; + }; + + Iterator begin() const; + Iterator end() const; + private: uint32_t ensureString( std::vector>& vector, diff --git a/src/osm_lua_processing.cpp b/src/osm_lua_processing.cpp index 14c17ee..4e0c547 100644 --- a/src/osm_lua_processing.cpp +++ b/src/osm_lua_processing.cpp @@ -5,9 +5,11 @@ #include "helpers.h" #include "coordinates_geom.h" #include "osm_mem_tiles.h" +#include "significant_tags.h" #include "tag_map.h" #include "node_store.h" #include "polylabel.h" +#include using namespace std; @@ -15,6 +17,26 @@ const std::string EMPTY_STRING = ""; thread_local kaguya::State *g_luaState = nullptr; thread_local OsmLuaProcessing* osmLuaProcessing = nullptr; +void handleOsmLuaProcessingUserSignal(int signum) { + osmLuaProcessing->handleUserSignal(signum); +} + +class Sigusr1Handler { +public: + Sigusr1Handler() { +#ifndef _WIN32 + signal(SIGUSR1, handleOsmLuaProcessingUserSignal); +#endif + } + + void initialize() { + // No-op just to ensure the compiler doesn't optimize away + // the handler. + } +}; + +thread_local Sigusr1Handler sigusr1Handler; + // A key in `currentTags`. If Lua code refers to an absent key, // found will be false. struct KnownTagKey { @@ -158,6 +180,8 @@ OsmLuaProcessing::OsmLuaProcessing( layers(layers), materializeGeometries(materializeGeometries) { + sigusr1Handler.initialize(); + // ---- Initialise Lua g_luaState = &luaState; luaState.setErrorHandler(lua_error_handler); @@ -213,6 +237,10 @@ OsmLuaProcessing::~OsmLuaProcessing() { luaState("if exit_function~=nil then exit_function() end"); } +void OsmLuaProcessing::handleUserSignal(int signum) { + std::cout << "processing OSM ID " << originalOsmID << std::endl; +} + // ---- Helpers provided for main routine // Has this object been assigned to any layers? @@ -871,8 +899,7 @@ bool OsmLuaProcessing::scanRelation(WayID id, const TagMap& tags) { return true; } -void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const TagMap& tags) { - +bool OsmLuaProcessing::setNode(NodeID id, LatpLon node, const TagMap& tags) { reset(); originalOsmID = id; isWay = false; @@ -899,7 +926,11 @@ void OsmLuaProcessing::setNode(NodeID id, LatpLon node, const TagMap& tags) { for (auto &output : finalizeOutputs()) { osmMemTiles.addObjectToSmallIndex(index, output, originalOsmID); } - } + + return true; + } + + return false; } // We are now processing a way @@ -995,10 +1026,25 @@ void OsmLuaProcessing::setRelation( } } -vector OsmLuaProcessing::GetSignificantNodeKeys() { - return luaState["node_keys"]; +SignificantTags OsmLuaProcessing::GetSignificantNodeKeys() { + if (!!luaState["node_keys"]) { + std::vector keys = luaState["node_keys"]; + return SignificantTags(keys); + } + + return SignificantTags(); } +SignificantTags OsmLuaProcessing::GetSignificantWayKeys() { + if (!!luaState["way_keys"]) { + std::vector keys = luaState["way_keys"]; + return SignificantTags(keys); + } + + return SignificantTags(); +} + + std::vector OsmLuaProcessing::finalizeOutputs() { std::vector list; list.reserve(this->outputs.size()); diff --git a/src/osm_store.cpp b/src/osm_store.cpp index 9844991..aaa7ceb 100644 --- a/src/osm_store.cpp +++ b/src/osm_store.cpp @@ -17,6 +17,40 @@ static inline bool isClosed(const std::vector& way) { return way.begin() == way.end(); } +UsedObjects::UsedObjects(Status status): status(status), mutex(256), ids(256 * 1024) { +} + +bool UsedObjects::test(NodeID id) { + if (status == Status::Disabled) + return true; + + const size_t chunk = id / 65536; + if (ids[chunk].size() == 0) + return false; + + return ids[chunk][id % 65536]; +} + +void UsedObjects::enable() { + status = Status::Enabled; +} + +void UsedObjects::set(NodeID id) { + const size_t chunk = id / 65536; + + std::lock_guard lock(mutex[chunk % mutex.size()]); + if (ids[chunk].size() == 0) + ids[chunk].resize(65536); + + ids[chunk][id % 65536] = true; +} + +void UsedObjects::clear() { + // This data is not needed after PbfProcessor's ReadPhase::Nodes has completed, + // and it takes up to ~1.5GB of RAM. + ids.clear(); +} + void OSMStore::open(std::string const &osm_store_filename) { void_mmap_allocator::openMmapFile(osm_store_filename); diff --git a/src/pbf_processor.cpp b/src/pbf_processor.cpp index fea18ef..0e2a1fd 100644 --- a/src/pbf_processor.cpp +++ b/src/pbf_processor.cpp @@ -24,7 +24,7 @@ PbfProcessor::PbfProcessor(OSMStore &osmStore) : osmStore(osmStore) { } -bool PbfProcessor::ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const unordered_set& nodeKeyPositions) +bool PbfProcessor::ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& nodeKeys) { // ---- Read nodes std::vector nodes; @@ -35,30 +35,24 @@ bool PbfProcessor::ReadNodes(OsmLuaProcessing& output, PbfReader::PrimitiveGroup NodeID nodeId = node.id; LatpLon latplon = { int(lat2latp(double(node.lat)/10000000.0)*10000000.0), node.lon }; - bool significant = false; - for (int i = node.tagStart; i < node.tagEnd; i += 2) { - auto keyIndex = pg.translateNodeKeyValue(i); + tags.reset(); + // For tagged nodes, call Lua, then save the OutputObject + for (int n = node.tagStart; n < node.tagEnd; n += 2) { + auto keyIndex = pg.translateNodeKeyValue(n); + auto valueIndex = pg.translateNodeKeyValue(n + 1); - if (nodeKeyPositions.find(keyIndex) != nodeKeyPositions.end()) { - significant = true; - } + const protozero::data_view& key = pb.stringTable[keyIndex]; + const protozero::data_view& value = pb.stringTable[valueIndex]; + tags.addTag(key, value); } - nodes.push_back(std::make_pair(static_cast(nodeId), latplon)); + bool emitted = false; + if (!tags.empty() && nodeKeys.filter(tags)) { + emitted = output.setNode(static_cast(nodeId), latplon, tags); + } - if (significant) { - tags.reset(); - // For tagged nodes, call Lua, then save the OutputObject - for (int n = node.tagStart; n < node.tagEnd; n += 2) { - auto keyIndex = pg.translateNodeKeyValue(n); - auto valueIndex = pg.translateNodeKeyValue(n + 1); - - const protozero::data_view& key = pb.stringTable[keyIndex]; - const protozero::data_view& value = pb.stringTable[valueIndex]; - tags.addTag(key, value); - } - output.setNode(static_cast(nodeId), latplon, tags); - } + if (emitted || osmStore.usedNodes.test(nodeId)) + nodes.push_back(std::make_pair(static_cast(nodeId), latplon)); } if (nodes.size() > 0) { @@ -72,6 +66,7 @@ bool PbfProcessor::ReadWays( OsmLuaProcessing &output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, + const SignificantTags& wayKeys, bool locationsOnWays, uint shard, uint effectiveShards @@ -89,6 +84,12 @@ bool PbfProcessor::ReadWays( std::vector nodeVec; for (PbfReader::Way pbfWay : pg.ways()) { + tags.reset(); + readTags(pbfWay, pb, tags); + + if (!osmStore.way_is_used(pbfWay.id) && !wayKeys.filter(tags)) + continue; + llVec.clear(); nodeVec.clear(); @@ -132,8 +133,6 @@ bool PbfProcessor::ReadWays( if (llVec.empty()) continue; try { - tags.reset(); - readTags(pbfWay, pb, tags); bool emitted = output.setWay(static_cast(pbfWay.id), llVec, tags); // If we need it for later, store the way's coordinates in the global way store @@ -160,7 +159,32 @@ bool PbfProcessor::ReadWays( return true; } -bool PbfProcessor::ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb) { +bool PbfProcessor::ScanWays(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& wayKeys) { + // Scan ways to see which nodes we need to save. + // + // This phase only runs if the Lua script has declared a `way_keys` variable. + if (pg.ways().empty()) + return false; + + TagMap tags; + + // Note: unlike ScanRelations, we don't call into Lua. Instead, we statically inspect + // the tags on each way to decide if it will be emitted. + for (auto& way : pg.ways()) { + tags.reset(); + readTags(way, pb, tags); + + if (osmStore.way_is_used(way.id) || wayKeys.filter(tags)) { + for (const auto id : way.refs) { + osmStore.usedNodes.set(id); + } + } + } + + return true; +} + +bool PbfProcessor::ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const SignificantTags& wayKeys) { // Scan relations to see which ways we need to save if (pg.relations().empty()) return false; @@ -174,14 +198,20 @@ bool PbfProcessor::ScanRelations(OsmLuaProcessing& output, PbfReader::PrimitiveG bool isMultiPolygon = relationIsType(pbfRelation, typeKey, mpKey); bool isAccepted = false; WayID relid = static_cast(pbfRelation.id); + tags.reset(); + readTags(pbfRelation, pb, tags); + if (!isMultiPolygon) { if (output.canReadRelations()) { - tags.reset(); - readTags(pbfRelation, pb, tags); isAccepted = output.scanRelation(relid, tags); } + if (!isAccepted) continue; + } else { + if (!wayKeys.filter(tags)) + continue; } + osmStore.usedRelations.set(relid); for (int n=0; n < pbfRelation.memids.size(); n++) { uint64_t lastID = pbfRelation.memids[n]; @@ -210,6 +240,7 @@ bool PbfProcessor::ReadRelations( PbfReader::PrimitiveGroup& pg, const PbfReader::PrimitiveBlock& pb, const BlockMetadata& blockMetadata, + const SignificantTags& wayKeys, uint shard, uint effectiveShards ) { @@ -263,7 +294,8 @@ bool PbfProcessor::ReadRelations( try { tags.reset(); readTags(pbfRelation, pb, tags); - output.setRelation(pb.stringTable, pbfRelation, outerWayVec, innerWayVec, tags, isMultiPolygon, isInnerOuter); + if (osmStore.usedRelations.test(pbfRelation.id) || wayKeys.filter(tags)) + output.setRelation(pb.stringTable, pbfRelation, outerWayVec, innerWayVec, tags, isMultiPolygon, isInnerOuter); } catch (std::out_of_range &err) { // Relation is missing a member? @@ -281,7 +313,8 @@ bool PbfProcessor::ReadBlock( std::istream& infile, OsmLuaProcessing& output, const BlockMetadata& blockMetadata, - const unordered_set& nodeKeys, + const SignificantTags& nodeKeys, + const SignificantTags& wayKeys, bool locationsOnWays, ReadPhase phase, uint shard, @@ -299,14 +332,6 @@ bool PbfProcessor::ReadBlock( // Keep count of groups read during this phase. std::size_t read_groups = 0; - // Read the string table, and pre-calculate the positions of valid node keys - unordered_set nodeKeyPositions; - for (auto it : nodeKeys) { - //nodeKeyPositions.insert(findStringPosition(pb, it)); - auto rv = findStringPosition(pb, it); - nodeKeyPositions.insert(rv); - } - int primitiveGroupSize = 0; for (auto& pg : pb.groups()) { primitiveGroupSize++; @@ -329,7 +354,7 @@ bool PbfProcessor::ReadBlock( }; if(phase == ReadPhase::Nodes) { - bool done = ReadNodes(output, pg, pb, nodeKeyPositions); + bool done = ReadNodes(output, pg, pb, nodeKeys); if(done) { output_progress(); ++read_groups; @@ -337,9 +362,21 @@ bool PbfProcessor::ReadBlock( } } + if(phase == ReadPhase::WayScan) { + bool done = ScanWays(output, pg, pb, wayKeys); + if(done) { + if (ioMutex.try_lock()) { + std::cout << "\r(Scanning for nodes used in ways: " << (100*blocksProcessed.load()/blocksToProcess.load()) << "%) "; + std::cout.flush(); + ioMutex.unlock(); + } + continue; + } + } + if(phase == ReadPhase::RelationScan) { osmStore.ensureUsedWaysInited(); - bool done = ScanRelations(output, pg, pb); + bool done = ScanRelations(output, pg, pb, wayKeys); if(done) { if (ioMutex.try_lock()) { std::cout << "\r(Scanning for ways used in relations: " << (100*blocksProcessed.load()/blocksToProcess.load()) << "%) "; @@ -351,7 +388,7 @@ bool PbfProcessor::ReadBlock( } if(phase == ReadPhase::Ways) { - bool done = ReadWays(output, pg, pb, locationsOnWays, shard, effectiveShards); + bool done = ReadWays(output, pg, pb, wayKeys, locationsOnWays, shard, effectiveShards); if(done) { output_progress(); ++read_groups; @@ -360,7 +397,7 @@ bool PbfProcessor::ReadBlock( } if(phase == ReadPhase::Relations) { - bool done = ReadRelations(output, pg, pb, blockMetadata, shard, effectiveShards); + bool done = ReadRelations(output, pg, pb, blockMetadata, wayKeys, shard, effectiveShards); if(done) { output_progress(); ++read_groups; @@ -412,7 +449,8 @@ bool blockHasPrimitiveGroupSatisfying( int PbfProcessor::ReadPbfFile( uint shards, bool hasSortTypeThenID, - unordered_set const& nodeKeys, + const SignificantTags& nodeKeys, + const SignificantTags& wayKeys, unsigned int threadNum, const pbfreader_generate_stream& generate_stream, const pbfreader_generate_output& generate_output, @@ -511,7 +549,16 @@ int PbfProcessor::ReadPbfFile( } - std::vector all_phases = { ReadPhase::RelationScan, ReadPhase::Nodes, ReadPhase::Ways, ReadPhase::Relations }; + std::vector all_phases = { ReadPhase::RelationScan }; + if (wayKeys.enabled()) { + osmStore.usedNodes.enable(); + all_phases.push_back(ReadPhase::WayScan); + } + + all_phases.push_back(ReadPhase::Nodes); + all_phases.push_back(ReadPhase::Ways); + all_phases.push_back(ReadPhase::Relations); + for(auto phase: all_phases) { uint effectiveShards = 1; @@ -561,6 +608,7 @@ int PbfProcessor::ReadPbfFile( for (const auto& entry : blocks) { if ((phase == ReadPhase::Nodes && entry.second.hasNodes) || (phase == ReadPhase::RelationScan && entry.second.hasRelations) || + (phase == ReadPhase::WayScan && entry.second.hasWays) || (phase == ReadPhase::Ways && entry.second.hasWays) || (phase == ReadPhase::Relations && entry.second.hasRelations)) filteredBlocks[entry.first] = entry.second; @@ -597,7 +645,7 @@ int PbfProcessor::ReadPbfFile( { for(const std::vector& blockRange: blockRanges) { - boost::asio::post(pool, [=, &blockRange, &blocks, &block_mutex, &nodeKeys]() { + boost::asio::post(pool, [=, &blockRange, &blocks, &block_mutex, &nodeKeys, &wayKeys]() { if (phase == ReadPhase::Nodes) osmStore.nodes.batchStart(); if (phase == ReadPhase::Ways) @@ -607,7 +655,7 @@ int PbfProcessor::ReadPbfFile( auto infile = generate_stream(); auto output = generate_output(); - if(ReadBlock(*infile, *output, indexedBlockMetadata, nodeKeys, locationsOnWays, phase, shard, effectiveShards)) { + if(ReadBlock(*infile, *output, indexedBlockMetadata, nodeKeys, wayKeys, locationsOnWays, phase, shard, effectiveShards)) { const std::lock_guard lock(block_mutex); blocks.erase(indexedBlockMetadata.index); } @@ -628,6 +676,7 @@ int PbfProcessor::ReadPbfFile( if(phase == ReadPhase::Nodes) { osmStore.nodes.finalize(threadNum); + osmStore.usedNodes.clear(); } if(phase == ReadPhase::Ways) { osmStore.ways.finalize(threadNum); diff --git a/src/significant_tags.cpp b/src/significant_tags.cpp new file mode 100644 index 0000000..7a3663f --- /dev/null +++ b/src/significant_tags.cpp @@ -0,0 +1,88 @@ +#include +#include "significant_tags.h" +#include "tag_map.h" + +TagFilter SignificantTags::parseFilter(std::string rawTag) { + TagFilter rv { true }; + + std::string input = rawTag; + + if (input.size() > 0 && input[0] == '~') { + rv.accept = false; + input = input.substr(1); + } + + size_t n = input.find("="); + + if (n == std::string::npos) { + rv.key = input; + return rv; + } + + rv.key = input.substr(0, n); + rv.value = input.substr(n + 1); + + return rv; +} + +SignificantTags::SignificantTags(): enabled_(false) {} + +SignificantTags::SignificantTags(std::vector rawTags): enabled_(true) { + for (const std::string& rawTag : rawTags) { + filters.push_back(parseFilter(rawTag)); + } + + if (filters.empty()) + return; + + bool accept = filters[0].accept; + + size_t i = 0; + for (const auto& filter : filters) { + if (filter.accept != accept) { + throw std::runtime_error("cannot mix reject and accept filters: " + rawTags[0] + ", " + rawTags[i]); + } + i++; + } +} + +bool SignificantTags::enabled() const { return enabled_; } + +bool SignificantTags::filter(const TagMap& tags) const { + if (!enabled_) + return true; + + if (filters.empty()) + return false; + + bool defaultReject = filters[0].accept; + + if (defaultReject) { + // There must be at least one tag matched by the filters. + for (const Tag& tag : tags) { + for (const TagFilter& filter : filters) { + if (filter.key == tag.key && (filter.value.empty() || filter.value == tag.value)) + return true; + } + } + + return false; + } + + // There must be at least one tag not matched by any filters. + for (const Tag& tag : tags) { + // If no filters match this tag, + bool hadMatch = false; + for (const TagFilter& filter : filters) { + if (filter.key == tag.key && (filter.value.empty() || filter.value == tag.value)) { + hadMatch = true; + break; + } + } + + if (!hadMatch) + return true; + } + + return false; +} diff --git a/src/tag_map.cpp b/src/tag_map.cpp index 8fc02a9..b93b502 100644 --- a/src/tag_map.cpp +++ b/src/tag_map.cpp @@ -16,6 +16,13 @@ void TagMap::reset() { } } +bool TagMap::empty() { + for (int i = 0; i < keys.size(); i++) + if (keys[i].size() > 0) + return false; + + return true; +} const std::size_t hashString(const std::string& str) { // This is a pretty crappy hash function in terms of bit // avalanching and distribution of output values. @@ -60,16 +67,12 @@ uint32_t TagMap::ensureString( void TagMap::addTag(const protozero::data_view& key, const protozero::data_view& value) { uint32_t valueLoc = ensureString(values, value); -// std::cout << "valueLoc = " << valueLoc << std::endl; uint32_t keyLoc = ensureString(keys, key); -// std::cout << "keyLoc = " << keyLoc << std::endl; const uint16_t shard = keyLoc >> 16; const uint16_t pos = keyLoc; -// std::cout << "shard=" << shard << ", pos=" << pos << std::endl; if (key2value[shard].size() <= pos) { -// std::cout << "growing shard" << std::endl; key2value[shard].resize(pos + 1); } @@ -133,3 +136,38 @@ boost::container::flat_map TagMap::exportToBoostMap() return rv; } + +TagMap::Iterator TagMap::begin() const { + size_t shard = 0; + while(keys.size() > shard && keys[shard].size() == 0) + shard++; + + return Iterator{*this, shard, 0}; +} + +TagMap::Iterator TagMap::end() const { + return Iterator{*this, keys.size(), 0}; +} + +bool TagMap::Iterator::operator!=(const Iterator& other) const { + return other.shard != shard || other.offset != offset; +} + +void TagMap::Iterator::operator++() { + ++offset; + if (offset >= map.keys[shard].size()) { + offset = 0; + shard++; + // Advance to the next non-empty shard. + while(map.keys.size() > shard && map.keys[shard].size() == 0) + shard++; + } +} + +Tag TagMap::Iterator::operator*() const { + const uint32_t valueLoc = map.key2value[shard][offset]; + return Tag{ + *map.keys[shard][offset], + *map.getValue(valueLoc) + }; +} diff --git a/src/tilemaker.cpp b/src/tilemaker.cpp index 5ad7dda..b18b726 100644 --- a/src/tilemaker.cpp +++ b/src/tilemaker.cpp @@ -42,6 +42,7 @@ #include "helpers.h" #include "coordinates.h" #include "coordinates_geom.h" +#include "significant_tags.h" #include "attribute_store.h" #include "output_object.h" @@ -255,10 +256,9 @@ int main(const int argc, const char* argv[]) { } shpMemTiles.reportSize(); - // ---- Read significant node tags - - vector nodeKeyVec = osmLuaProcessing.GetSignificantNodeKeys(); - unordered_set nodeKeys(nodeKeyVec.begin(), nodeKeyVec.end()); + // ---- Read significant node/way tags + const SignificantTags significantNodeTags = osmLuaProcessing.GetSignificantNodeKeys(); + const SignificantTags significantWayTags = osmLuaProcessing.GetSignificantWayKeys(); // ---- Read all PBFs @@ -274,7 +274,8 @@ int main(const int argc, const char* argv[]) { int ret = pbfProcessor.ReadPbfFile( nodeStore->shards(), hasSortTypeThenID, - nodeKeys, + significantNodeTags, + significantWayTags, options.threadNum, [&]() { thread_local std::shared_ptr pbfStream(new ifstream(inputFile, ios::in | ios::binary)); diff --git a/test/significant_tags.test.cpp b/test/significant_tags.test.cpp new file mode 100644 index 0000000..33e2694 --- /dev/null +++ b/test/significant_tags.test.cpp @@ -0,0 +1,119 @@ +#include +#include "external/minunit.h" +#include "significant_tags.h" +#include "tag_map.h" + +MU_TEST(test_parse_filter) { + { + TagFilter expected{true, "foo", ""}; + mu_check(SignificantTags::parseFilter("foo") == expected); + } + + { + TagFilter expected{false, "foo", ""}; + mu_check(SignificantTags::parseFilter("~foo") == expected); + } + + { + TagFilter expected{true, "foo", "bar"}; + mu_check(SignificantTags::parseFilter("foo=bar") == expected); + } + + { + TagFilter expected{false, "foo", "bar"}; + mu_check(SignificantTags::parseFilter("~foo=bar") == expected); + } + +} + +MU_TEST(test_invalid_significant_tags) { + bool threw = false; + try { + // Filters must be all accept, or all reject, not a mix. + SignificantTags tags({"a", "~b"}); + } catch (...) { + threw = true; + } + + mu_check(threw); +} + +MU_TEST(test_significant_tags) { + const std::string building = "building"; + const std::string yes = "yes"; + const std::string name = "name"; + const std::string nameValue = "Some name"; + const std::string power = "power"; + const std::string tower = "tower"; + + // If created with no list, it's not enabled and all things pass filter. + // This is the case when people omit `node_keys` or `way_keys`. + { + SignificantTags tags; + TagMap map; + mu_check(tags.filter(map)); + } + + // If created with empty list, it rejects all things. + // This is the case when people write `way_keys = {}`, e.g. when creating + // an extract that only parses nodes. + { + std::vector empty; + SignificantTags tags(empty); + TagMap map; + mu_check(!tags.filter(map)); + } + + // If created in default-accept mode, it accepts anything with an unmatched tag. + // This is the case when people write `way_keys = {"-building"}` + { + std::vector defaultAccept{"~building"}; + SignificantTags tags(defaultAccept); + + { + TagMap map; + map.addTag(building, yes); + mu_check(!tags.filter(map)); + } + + { + TagMap map; + map.addTag(building, yes); + map.addTag(name, nameValue); + mu_check(tags.filter(map)); + } + + } + + // If created in default-reject mode, it accepts anything with a matched tag. + // This is the case when people write `way_keys = {"power=tower"}` + { + std::vector defaultReject{"power=tower"}; + SignificantTags tags(defaultReject); + + { + TagMap map; + mu_check(!tags.filter(map)); + } + + { + TagMap map; + map.addTag(power, tower); + mu_check(tags.filter(map)); + } + + } +} + +MU_TEST_SUITE(test_suite_significant_tags) { + MU_RUN_TEST(test_parse_filter); + MU_RUN_TEST(test_significant_tags); + MU_RUN_TEST(test_invalid_significant_tags); +} + +int main() { + MU_RUN_SUITE(test_suite_significant_tags); + MU_REPORT(); + return MU_EXIT_CODE; +} +