Skip to content

Commit

Permalink
Support relative paths in shortlist and sqlite options (#612)
Browse files Browse the repository at this point in the history
* Refactorize processPaths
* Fix relative paths for shortlist and sqlite options
* Rename InterpolateEnvVars to interpolateEnvVars
* Update CHANGELOG
  • Loading branch information
snukky authored and ugermann committed May 20, 2020
1 parent 93a27dc commit 733cb50
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 43 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]

### Added
- Supporting relative paths in shortlist and sqlite options
- Training and scoring from STDIN
- Support for reading from TSV files from STDIN and other sources during training
- Support for reading from TSV files from STDIN and other sources during training
and translation with options --tsv and --tsv-fields n.

### Fixed
Expand Down
57 changes: 27 additions & 30 deletions src/common/cli_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace cli {

// helper to replace environment-variable expressions of the form ${VARNAME} in
// a string
static inline std::string InterpolateEnvVars(std::string str) {
static inline std::string interpolateEnvVars(std::string str) {
// temporary workaround for MS-internal PhillyOnAzure cluster: warm storage
// presently has the form /hdfs/VC instead of /{gfs,hdfs}/CLUSTER/VC

Expand Down Expand Up @@ -58,43 +58,40 @@ static inline std::string InterpolateEnvVars(std::string str) {
}
}

// helper to implement interpolate-env-vars and relative-paths options
// Helper to implement interpolate-env-vars and relative-paths options
static inline void processPaths(
YAML::Node& node,
const std::function<std::string(std::string)>& TransformPath,
const std::set<std::string>& PATHS,
bool isPath = false) {
if(isPath) {
if(node.Type() == YAML::NodeType::Scalar) {
std::string nodePath = node.as<std::string>();
// transform the path
if(!nodePath.empty())
node = TransformPath(nodePath);
}
bool isPath = false,
const std::string parentKey = "") {
// For a scalar node (leaves in the config), just transform the path
if(isPath && node.IsScalar()) {
std::string nodePath = node.as<std::string>();
if(!nodePath.empty())
node = TransformPath(nodePath);
}
// For a sequence node, recursively iterate each value
else if(node.IsSequence()) {
for(auto&& sub : node) {
processPaths(sub, TransformPath, PATHS, isPath);

if(node.Type() == YAML::NodeType::Sequence) {
for(auto&& sub : node) {
processPaths(sub, TransformPath, PATHS, true);
}
}
} else {
switch(node.Type()) {
case YAML::NodeType::Sequence:
for(auto&& sub : node) {
processPaths(sub, TransformPath, PATHS, false);
}
break;
case YAML::NodeType::Map:
for(auto&& sub : node) {
std::string key = sub.first.as<std::string>();
processPaths(sub.second, TransformPath, PATHS, PATHS.count(key) > 0);
}
break;
default:
// it is OK
// Exception for the shortlist option, which keeps a path and three numbers;
// we want to process the path only and keep the rest untouched
if(isPath && parentKey == "shortlist")
break;
}
}
// For a map node that is not a path, recursively iterate each value
else if(!isPath && node.IsMap()) {
for(auto&& sub : node) {
std::string key = sub.first.as<std::string>();
// Exception for the sqlite option, which has a special value of 'temporary'
if(key == "sqlite" && sub.second.as<std::string>() == "temporary")
continue;
processPaths(sub.second, TransformPath, PATHS, PATHS.count(key) > 0, key);
}
}
}

// helper to convert a YAML node recursively into a string
Expand Down
22 changes: 10 additions & 12 deletions src/common/config_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,14 @@ const std::set<std::string> PATHS = {
"valid-script-args",
"valid-log",
"valid-translation-output",
"input", // except: stdin
"output", // except: stdout
"input", // except: 'stdin', handled in makeAbsolutePaths and interpolateEnvVars
"output", // except: 'stdout', handled in makeAbsolutePaths and interpolateEnvVars
"pretrained-model",
"data-weighting",
"log"
// TODO: Handle the special value in helper functions
//"sqlite", // except: temporary
// TODO: This is a vector with a path and some numbers, handle this in helper
// functions or separate shortlist path to a separate command-line option
//"shortlist",
"log",
"sqlite", // except: 'temporary', handled in the processPaths function
"shortlist", // except: only the first element in the sequence is a path, handled in the
// processPaths function
};
// clang-format on

Expand Down Expand Up @@ -876,7 +874,7 @@ Ptr<Options> ConfigParser::parseOptions(int argc, char** argv, bool doValidate){
}

if(get<bool>("interpolate-env-vars")) {
cli::processPaths(config_, cli::InterpolateEnvVars, PATHS);
cli::processPaths(config_, cli::interpolateEnvVars, PATHS);
}

// Option shortcuts for input from STDIN for trainer and scorer
Expand Down Expand Up @@ -931,12 +929,12 @@ std::vector<std::string> ConfigParser::findConfigPaths() {
for(auto& path : paths) {
// (note: this updates the paths array)
if(interpolateEnvVars)
path = cli::InterpolateEnvVars(path);
path = cli::interpolateEnvVars(path);
}
} else if(mode_ == cli::mode::training) {
auto path = config_["model"].as<std::string>() + ".yml";
if(interpolateEnvVars)
path = cli::InterpolateEnvVars(path);
path = cli::interpolateEnvVars(path);

bool reloadConfig = filesystem::exists(path) && !get<bool>("no-reload");
if(reloadConfig)
Expand All @@ -962,7 +960,7 @@ YAML::Node ConfigParser::loadConfigFiles(const std::vector<std::string>& paths)
&& config["interpolate-env-vars"].as<bool>())
|| get<bool>("interpolate-env-vars");
if(interpolateEnvVars)
cli::processPaths(config, cli::InterpolateEnvVars, PATHS);
cli::processPaths(config, cli::interpolateEnvVars, PATHS);

// replace relative path w.r.t. the config file
cli::makeAbsolutePaths(config, path, PATHS);
Expand Down

0 comments on commit 733cb50

Please sign in to comment.