From 733cb505bc7353635ee02fdddc7eb9b6465d976b Mon Sep 17 00:00:00 2001 From: Roman Grundkiewicz Date: Sun, 12 Apr 2020 18:56:11 +0100 Subject: [PATCH] Support relative paths in shortlist and sqlite options (#612) * Refactorize processPaths * Fix relative paths for shortlist and sqlite options * Rename InterpolateEnvVars to interpolateEnvVars * Update CHANGELOG --- CHANGELOG.md | 3 +- src/common/cli_helper.h | 57 +++++++++++++++++------------------- src/common/config_parser.cpp | 22 +++++++------- 3 files changed, 39 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40bee3d70..df1b05c79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added +- Supporting relative paths in shortlist and sqlite options - Training and scoring from STDIN -- Support for reading from TSV files from STDIN and other sources during training +- Support for reading from TSV files from STDIN and other sources during training and translation with options --tsv and --tsv-fields n. ### Fixed diff --git a/src/common/cli_helper.h b/src/common/cli_helper.h index 4477f0c02..dc8eafdf6 100644 --- a/src/common/cli_helper.h +++ b/src/common/cli_helper.h @@ -10,7 +10,7 @@ namespace cli { // helper to replace environment-variable expressions of the form ${VARNAME} in // a string -static inline std::string InterpolateEnvVars(std::string str) { +static inline std::string interpolateEnvVars(std::string str) { // temporary workaround for MS-internal PhillyOnAzure cluster: warm storage // presently has the form /hdfs/VC instead of /{gfs,hdfs}/CLUSTER/VC @@ -58,43 +58,40 @@ static inline std::string InterpolateEnvVars(std::string str) { } } -// helper to implement interpolate-env-vars and relative-paths options +// Helper to implement interpolate-env-vars and relative-paths options static inline void processPaths( YAML::Node& node, const std::function& TransformPath, const std::set& PATHS, - bool isPath = false) { - if(isPath) { - if(node.Type() == YAML::NodeType::Scalar) { - std::string nodePath = node.as(); - // transform the path - if(!nodePath.empty()) - node = TransformPath(nodePath); - } + bool isPath = false, + const std::string parentKey = "") { + // For a scalar node (leaves in the config), just transform the path + if(isPath && node.IsScalar()) { + std::string nodePath = node.as(); + if(!nodePath.empty()) + node = TransformPath(nodePath); + } + // For a sequence node, recursively iterate each value + else if(node.IsSequence()) { + for(auto&& sub : node) { + processPaths(sub, TransformPath, PATHS, isPath); - if(node.Type() == YAML::NodeType::Sequence) { - for(auto&& sub : node) { - processPaths(sub, TransformPath, PATHS, true); - } - } - } else { - switch(node.Type()) { - case YAML::NodeType::Sequence: - for(auto&& sub : node) { - processPaths(sub, TransformPath, PATHS, false); - } - break; - case YAML::NodeType::Map: - for(auto&& sub : node) { - std::string key = sub.first.as(); - processPaths(sub.second, TransformPath, PATHS, PATHS.count(key) > 0); - } - break; - default: - // it is OK + // Exception for the shortlist option, which keeps a path and three numbers; + // we want to process the path only and keep the rest untouched + if(isPath && parentKey == "shortlist") break; } } + // For a map node that is not a path, recursively iterate each value + else if(!isPath && node.IsMap()) { + for(auto&& sub : node) { + std::string key = sub.first.as(); + // Exception for the sqlite option, which has a special value of 'temporary' + if(key == "sqlite" && sub.second.as() == "temporary") + continue; + processPaths(sub.second, TransformPath, PATHS, PATHS.count(key) > 0, key); + } + } } // helper to convert a YAML node recursively into a string diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 05a6ccb1d..2f56d8870 100755 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -39,16 +39,14 @@ const std::set PATHS = { "valid-script-args", "valid-log", "valid-translation-output", - "input", // except: stdin - "output", // except: stdout + "input", // except: 'stdin', handled in makeAbsolutePaths and interpolateEnvVars + "output", // except: 'stdout', handled in makeAbsolutePaths and interpolateEnvVars "pretrained-model", "data-weighting", - "log" - // TODO: Handle the special value in helper functions - //"sqlite", // except: temporary - // TODO: This is a vector with a path and some numbers, handle this in helper - // functions or separate shortlist path to a separate command-line option - //"shortlist", + "log", + "sqlite", // except: 'temporary', handled in the processPaths function + "shortlist", // except: only the first element in the sequence is a path, handled in the + // processPaths function }; // clang-format on @@ -876,7 +874,7 @@ Ptr ConfigParser::parseOptions(int argc, char** argv, bool doValidate){ } if(get("interpolate-env-vars")) { - cli::processPaths(config_, cli::InterpolateEnvVars, PATHS); + cli::processPaths(config_, cli::interpolateEnvVars, PATHS); } // Option shortcuts for input from STDIN for trainer and scorer @@ -931,12 +929,12 @@ std::vector ConfigParser::findConfigPaths() { for(auto& path : paths) { // (note: this updates the paths array) if(interpolateEnvVars) - path = cli::InterpolateEnvVars(path); + path = cli::interpolateEnvVars(path); } } else if(mode_ == cli::mode::training) { auto path = config_["model"].as() + ".yml"; if(interpolateEnvVars) - path = cli::InterpolateEnvVars(path); + path = cli::interpolateEnvVars(path); bool reloadConfig = filesystem::exists(path) && !get("no-reload"); if(reloadConfig) @@ -962,7 +960,7 @@ YAML::Node ConfigParser::loadConfigFiles(const std::vector& paths) && config["interpolate-env-vars"].as()) || get("interpolate-env-vars"); if(interpolateEnvVars) - cli::processPaths(config, cli::InterpolateEnvVars, PATHS); + cli::processPaths(config, cli::interpolateEnvVars, PATHS); // replace relative path w.r.t. the config file cli::makeAbsolutePaths(config, path, PATHS);