Skip to content

Commit

Permalink
Options for untagged entities (#103)
Browse files Browse the repository at this point in the history
* add booleans for writing untagged nodes, ways, relations, and areas, set them to true by default

* add flags --no-untagged-nodes, --no-untagged-ways, --no-untagged-relations, and --no-untagged-areas
  • Loading branch information
patrickbr authored Nov 28, 2024
1 parent 3769c8e commit d7db303
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 30 deletions.
5 changes: 5 additions & 0 deletions include/osm2rdf/config/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ struct Config {
bool addWayNodeSpatialMetadata = false;
bool skipWikiLinks = false;

bool addUntaggedNodes = true;
bool addUntaggedWays = true;
bool addUntaggedRelations = true;
bool addUntaggedAreas = true;

int numThreads = std::thread::hardware_concurrency();

// Default settings for data
Expand Down
32 changes: 32 additions & 0 deletions include/osm2rdf/config/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,38 @@ const static inline std::string NO_WAY_GEOM_RELATIONS_OPTION_LONG =
const static inline std::string NO_WAY_GEOM_RELATIONS_OPTION_HELP =
"Do not dump way geometric relations";

const static inline std::string NO_UNTAGGED_NODES_INFO =
"Do not output untagged nodes";
const static inline std::string NO_UNTAGGED_NODES_OPTION_SHORT = "";
const static inline std::string NO_UNTAGGED_NODES_OPTION_LONG =
"no-untagged-nodes";
const static inline std::string NO_UNTAGGED_NODES_OPTION_HELP =
"Do not output untagged nodes";

const static inline std::string NO_UNTAGGED_WAYS_INFO =
"Do not output untagged ways";
const static inline std::string NO_UNTAGGED_WAYS_OPTION_SHORT = "";
const static inline std::string NO_UNTAGGED_WAYS_OPTION_LONG =
"no-untagged-ways";
const static inline std::string NO_UNTAGGED_WAYS_OPTION_HELP =
"Do not output untagged ways";

const static inline std::string NO_UNTAGGED_RELATIONS_INFO =
"Do not output untagged relations";
const static inline std::string NO_UNTAGGED_RELATIONS_OPTION_SHORT = "";
const static inline std::string NO_UNTAGGED_RELATIONS_OPTION_LONG =
"no-untagged-relations";
const static inline std::string NO_UNTAGGED_RELATIONS_OPTION_HELP =
"Do not output untagged relations";

const static inline std::string NO_UNTAGGED_AREAS_INFO =
"Do not output untagged areas";
const static inline std::string NO_UNTAGGED_AREAS_OPTION_SHORT = "";
const static inline std::string NO_UNTAGGED_AREAS_OPTION_LONG =
"no-untagged-areas";
const static inline std::string NO_UNTAGGED_AREAS_OPTION_HELP =
"Do not output untagged areas";

const static inline std::string ADD_AREA_WAY_LINESTRINGS_INFO =
"Adding linestrings for ways which form areas";
const static inline std::string ADD_AREA_WAY_LINESTRINGS_OPTION_SHORT = "";
Expand Down
3 changes: 3 additions & 0 deletions include/osm2rdf/osm/CountHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace osm2rdf::osm {

class CountHandler : public osmium::handler::Handler {
public:
CountHandler(const osm2rdf::config::Config& config) : _config(config) {};
void node(const osmium::Node& node);
void relation(const osmium::Relation& relation);
void way(const osmium::Way& way);
Expand All @@ -45,6 +46,8 @@ class CountHandler : public osmium::handler::Handler {
bool _firstPassDone = false;
size_t _minId = std::numeric_limits<size_t>::max();
size_t _maxId = 0;

osm2rdf::config::Config _config;
};
}

Expand Down
69 changes: 57 additions & 12 deletions src/config/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
// You should have received a copy of the GNU General Public License
// along with osm2rdf. If not, see <https://www.gnu.org/licenses/>.

#include "osm2rdf/config/Config.h"

#include <filesystem>
#include <iostream>
#include <string>

#include "osm2rdf/config/Config.h"

#if defined(_OPENMP)
#include "omp.h"
#endif
Expand Down Expand Up @@ -87,6 +87,22 @@ std::string osm2rdf::config::Config::getInfo(std::string_view prefix) const {
<< osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_INFO;
}
}
if (!addUntaggedNodes) {
oss << "\n"
<< prefix << osm2rdf::config::constants::NO_UNTAGGED_NODES_INFO;
}
if (!addUntaggedWays) {
oss << "\n"
<< prefix << osm2rdf::config::constants::NO_UNTAGGED_WAYS_INFO;
}
if (!addUntaggedRelations) {
oss << "\n"
<< prefix << osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_INFO;
}
if (!addUntaggedAreas) {
oss << "\n"
<< prefix << osm2rdf::config::constants::NO_UNTAGGED_AREAS_INFO;
}
if (simplifyWKT > 0) {
oss << "\n" << prefix << osm2rdf::config::constants::SIMPLIFY_WKT_INFO;
oss << "\n"
Expand Down Expand Up @@ -145,13 +161,12 @@ std::string osm2rdf::config::Config::getInfo(std::string_view prefix) const {
}
}
oss << "\n" << prefix << osm2rdf::config::constants::SECTION_MISCELLANEOUS;
oss << "\n"
<< prefix << "Num Threads: " << numThreads;
oss << "\n" << prefix << "Num Threads: " << numThreads;

if (!storeLocations.empty()) {
oss << "\n"
<< prefix << osm2rdf::config::constants::STORE_LOCATIONS_INFO
<< " " << storeLocations;
<< prefix << osm2rdf::config::constants::STORE_LOCATIONS_INFO << " "
<< storeLocations;
}

if (writeRDFStatistics) {
Expand Down Expand Up @@ -265,6 +280,30 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_OPTION_LONG,
osm2rdf::config::constants::ADD_AREA_WAY_LINESTRINGS_OPTION_HELP);

auto noUntaggedNodesOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_NODES_OPTION_HELP);

auto noUntaggedWaysOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_WAYS_OPTION_HELP);

auto noUntaggedRelationsOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_RELATIONS_OPTION_HELP);

auto noUntaggedAreasOp =
parser.add<popl::Switch, popl::Attribute::expert>(
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_SHORT,
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_LONG,
osm2rdf::config::constants::NO_UNTAGGED_AREAS_OPTION_HELP);

auto addWayMetadataOp = parser.add<popl::Switch>(
osm2rdf::config::constants::ADD_WAY_METADATA_OPTION_SHORT,
osm2rdf::config::constants::ADD_WAY_METADATA_OPTION_LONG,
Expand All @@ -288,11 +327,10 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
osm2rdf::config::constants::AUX_GEO_FILES_OPTION_LONG,
osm2rdf::config::constants::AUX_GEO_FILES_OPTION_HELP);

auto numThreadsOp =
parser.add<popl::Value<int>, popl::Attribute::advanced>(
osm2rdf::config::constants::NUM_THREADS_OPTION_SHORT,
osm2rdf::config::constants::NUM_THREADS_OPTION_LONG,
osm2rdf::config::constants::NUM_THREADS_OPTION_HELP, numThreads);
auto numThreadsOp = parser.add<popl::Value<int>, popl::Attribute::advanced>(
osm2rdf::config::constants::NUM_THREADS_OPTION_SHORT,
osm2rdf::config::constants::NUM_THREADS_OPTION_LONG,
osm2rdf::config::constants::NUM_THREADS_OPTION_HELP, numThreads);

auto semicolonTagKeysOp =
parser.add<popl::Value<std::string>, popl::Attribute::advanced>(
Expand Down Expand Up @@ -444,6 +482,11 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
wktDeviation = wktDeviationOp->value();
wktPrecision = wktPrecisionOp->value();

addUntaggedNodes = !noUntaggedNodesOp->is_set();
addUntaggedWays = !noUntaggedWaysOp->is_set();
addUntaggedRelations = !noUntaggedRelationsOp->is_set();
addUntaggedAreas = !noUntaggedAreasOp->is_set();

addWayNodeOrder |= addWayNodeSpatialMetadata;

if (semicolonTagKeysOp->is_set()) {
Expand All @@ -464,7 +507,9 @@ void osm2rdf::config::Config::fromArgs(int argc, char** argv) {
// Output
output = outputOp->value();
outputFormat = outputFormatOp->value();
outputCompress = outputCompressOp->value() == "none" ? NONE : (outputCompressOp->value() == "gz" ? GZ : BZ2);
outputCompress = outputCompressOp->value() == "none"
? NONE
: (outputCompressOp->value() == "gz" ? GZ : BZ2);
outputKeepFiles = outputKeepFilesOp->is_set();
if (output.empty()) {
outputCompress = NONE;
Expand Down
28 changes: 11 additions & 17 deletions src/osm/CountHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,52 +16,46 @@
// You should have received a copy of the GNU General Public License
// along with osm2rdf. If not, see <https://www.gnu.org/licenses/>.

#include "osm2rdf/osm/CountHandler.h"

#include <iostream>

#include "osm2rdf/osm/CountHandler.h"

// ____________________________________________________________________________
void osm2rdf::osm::CountHandler::prepare_for_lookup() {
_firstPassDone = true;
}
void osm2rdf::osm::CountHandler::prepare_for_lookup() { _firstPassDone = true; }

// ____________________________________________________________________________
void osm2rdf::osm::CountHandler::node(const osmium::Node& node){
void osm2rdf::osm::CountHandler::node(const osmium::Node& node) {
if (node.positive_id() < _minId) _minId = node.positive_id();
if (node.positive_id() > _maxId) _maxId = node.positive_id();
if (_firstPassDone) {
if (_firstPassDone || (!_config.addUntaggedNodes && node.tags().empty())) {
return;
}
_numNodes++;
}

// ____________________________________________________________________________
void osm2rdf::osm::CountHandler::relation(const osmium::Relation&) {
if (_firstPassDone) {
void osm2rdf::osm::CountHandler::relation(const osmium::Relation& rel) {
if (_firstPassDone || (!_config.addUntaggedRelations && rel.tags().empty())) {
return;
}
_numRelations++;
}

// ____________________________________________________________________________
void osm2rdf::osm::CountHandler::way(const osmium::Way&) {
if (_firstPassDone) {
void osm2rdf::osm::CountHandler::way(const osmium::Way& way) {
if (_firstPassDone || (!_config.addUntaggedWays && way.tags().empty())) {
return;
}
_numWays++;
}

// ____________________________________________________________________________
size_t osm2rdf::osm::CountHandler::numNodes() const {
return _numNodes;
}
size_t osm2rdf::osm::CountHandler::numNodes() const { return _numNodes; }

// ____________________________________________________________________________
size_t osm2rdf::osm::CountHandler::numRelations() const {
return _numRelations;
}

// ____________________________________________________________________________
size_t osm2rdf::osm::CountHandler::numWays() const {
return _numWays;
}
size_t osm2rdf::osm::CountHandler::numWays() const { return _numWays; }
21 changes: 20 additions & 1 deletion src/osm/OsmiumHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void osm2rdf::osm::OsmiumHandler<W>::handle() {
assembler_config.create_empty_areas = false;
osmium::area::MultipolygonManager<osmium::area::Assembler> mp_manager{
assembler_config};
osm2rdf::osm::CountHandler countHandler;
osm2rdf::osm::CountHandler countHandler(_config);

// read relations for areas
{
Expand Down Expand Up @@ -163,6 +163,11 @@ void osm2rdf::osm::OsmiumHandler<W>::handle() {
template <typename W>
void osm2rdf::osm::OsmiumHandler<W>::area(const osmium::Area& area) {
_areasSeen++;

if (!_config.addUntaggedAreas && area.tags().empty()) {
return;
}

try {
auto osmArea = osm2rdf::osm::Area(area);
#pragma omp task
Expand All @@ -187,6 +192,10 @@ template <typename W>
void osm2rdf::osm::OsmiumHandler<W>::node(const osmium::Node& node) {
_nodesSeen++;

if (!_config.addUntaggedNodes && node.tags().empty()) {
return;
}

try {
const auto& osmNode = osm2rdf::osm::Node(node);
#pragma omp task
Expand Down Expand Up @@ -224,6 +233,11 @@ template <typename W>
void osm2rdf::osm::OsmiumHandler<W>::relation(
const osmium::Relation& relation) {
_relationsSeen++;

if (!_config.addUntaggedRelations && relation.tags().empty()) {
return;
}

try {
// only task this away if we actually build the relation geometries,
// otherwise this just adds multithreading overhead for nothing
Expand Down Expand Up @@ -267,6 +281,11 @@ void osm2rdf::osm::OsmiumHandler<W>::relation(
template <typename W>
void osm2rdf::osm::OsmiumHandler<W>::way(const osmium::Way& way) {
_waysSeen++;

if (!_config.addUntaggedWays && way.tags().empty()) {
return;
}

try {
auto osmWay = osm2rdf::osm::Way(way);

Expand Down

0 comments on commit d7db303

Please sign in to comment.