k2-fsa
diff --git a/‎k2/csrc/ragged_ops.h
+1-1 b/‎k2/csrc/ragged_ops.h
+1-1
diff --git a/‎k2/torch/bin/CMakeLists.txt
+11-1 b/‎k2/torch/bin/CMakeLists.txt
+11-1
diff --git a/‎k2/torch/bin/pruned_stateless_transducer.cu
+194 b/‎k2/torch/bin/pruned_stateless_transducer.cu
+194
diff --git a/‎k2/torch/csrc/CMakeLists.txt
+3 b/‎k2/torch/csrc/CMakeLists.txt
+3
@@ -1202,7 +1202,7 @@ RaggedShape ComposeRaggedShapes3(const RaggedShape &a, const RaggedShape &b,
   If cached_tot_sizeN is not -1, it must equal the total size on
   that axis which will equal the last element of row_splitsN (if
   provided) and must equal the row_idsN.Dim(), if provided. See
-  documentation above for RagggedShape2 for details.
+  documentation above for RaggedShape2 for details.
 
   We also require that (supposing both row_splitsN and row_idsN are non-NULL):
   row_splits1[row_splits1.Dim() - 1] == row_ids1.Dim()
 
@@ -64,7 +64,6 @@ add_executable(online_decode ${online_decode_srcs})
 set_property(TARGET online_decode PROPERTY CXX_STANDARD 14)
 target_link_libraries(online_decode ${bin_dep_libs})
 
-
 #-------------------------------------------
 #      rnnt demo
 #-------------------------------------------
@@ -77,3 +76,14 @@ add_executable(rnnt_demo ${rnnt_demo_srcs})
 set_property(TARGET rnnt_demo PROPERTY CXX_STANDARD 14)
 target_link_libraries(rnnt_demo ${bin_dep_libs})
 
+#-------------------------------------------
+#      pruned stateless transducer
+#-------------------------------------------
+set(pruned_stateless_transducer_srcs pruned_stateless_transducer.cu)
+if(NOT K2_WITH_CUDA)
+  transform(OUTPUT_VARIABLE pruned_stateless_transducer_srcs SRCS ${pruned_stateless_transducer_srcs})
+endif()
+
+add_executable(pruned_stateless_transducer ${pruned_stateless_transducer_srcs})
+set_property(TARGET pruned_stateless_transducer PROPERTY CXX_STANDARD 14)
+target_link_libraries(pruned_stateless_transducer ${bin_dep_libs})
@@ -0,0 +1,194 @@
+/**
+ * Copyright      2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "k2/csrc/log.h"
+#include "k2/torch/csrc/beam_search.h"
+#include "k2/torch/csrc/features.h"
+#include "k2/torch/csrc/parse_options.h"
+#include "k2/torch/csrc/wave_reader.h"
+#include "sentencepiece_processor.h"  // NOLINT
+#include "torch/all.h"
+
+static constexpr const char *kUsageMessage = R"(
+This file implements RNN-T decoding for pruned stateless transducer models
+that are trained using pruned_transducer_statelessX (X>=2) from icefall.
+
+Usage:
+  ./bin/pruned_stateless_transducer --help
+
+  ./bin/pruned_stateless_transducer \
+    --nn-model=/path/to/cpu_jit.pt \
+    --bpe-model=/path/to/bpe.model \
+    --use-gpu=true \
+    --decoding-method=modified_beam_search \
+    /path/to/foo.wav \
+    /path/to/bar.wav
+)";
+
+static void RegisterFrameExtractionOptions(
+    k2::ParseOptions *po, kaldifeat::FrameExtractionOptions *opts) {
+  po->Register("sample-frequency", &opts->samp_freq,
+               "Waveform data sample frequency (must match the waveform file, "
+               "if specified there)");
+
+  po->Register("frame-length", &opts->frame_length_ms,
+               "Frame length in milliseconds");
+
+  po->Register("frame-shift", &opts->frame_shift_ms,
+               "Frame shift in milliseconds");
+
+  po->Register("dither", &opts->dither,
+               "Dithering constant (0.0 means no dither).");
+}
+
+static void RegisterMelBanksOptions(k2::ParseOptions *po,
+                                    kaldifeat::MelBanksOptions *opts) {
+  po->Register("num-mel-bins", &opts->num_bins,
+               "Number of triangular mel-frequency bins");
+}
+
+int main(int argc, char *argv[]) {
+  // see
+  // https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html
+  torch::set_num_threads(1);
+  torch::set_num_interop_threads(1);
+  torch::NoGradGuard no_grad;
+
+  k2::ParseOptions po(kUsageMessage);
+
+  std::string nn_model;   // path to the torch jit model file
+  std::string bpe_model;  // path to the BPE model file
+  bool use_gpu = false;   // true to use GPU for decoding; false to use CPU.
+  std::string decoding_method = "greedy_search";  // Supported methods are:
+                                                  // greedy_search,
+                                                  // modified_beam_search
+
+  kaldifeat::FbankOptions fbank_opts;
+  fbank_opts.frame_opts.dither = 0;
+  RegisterFrameExtractionOptions(&po, &fbank_opts.frame_opts);
+  fbank_opts.mel_opts.num_bins = 80;
+  RegisterMelBanksOptions(&po, &fbank_opts.mel_opts);
+
+  po.Register("nn-model", &nn_model, "Path to the torch jit model file");
+
+  po.Register("bpe-model", &bpe_model, "Path to the BPE model file");
+
+  po.Register("use-gpu", &use_gpu,
+              "true to use GPU for decoding; false to use CPU. "
+              "If GPU is enabled, it always uses GPU 0. You can use "
+              "the environment variable CUDA_VISIBLE_DEVICES to control "
+              "which GPU device to use.");
+
+  po.Register(
+      "decoding-method", &decoding_method,
+      "Decoding method to use."
+      "Currently implemented methods are: greedy_search, modified_beam_search");
+
+  po.Read(argc, argv);
+
+  K2_CHECK(decoding_method == "greedy_search" ||
+           decoding_method == "modified_beam_search")
+      << "Currently supported decoding methods are: "
+         "greedy_search, modified_beam_search. "
+      << "Given: " << decoding_method;
+
+  torch::Device device(torch::kCPU);
+  if (use_gpu) {
+    K2_LOG(INFO) << "Use GPU";
+    device = torch::Device(torch::kCUDA, 0);
+  }
+
+  K2_LOG(INFO) << "Device: " << device;
+
+  int32_t num_waves = po.NumArgs();
+  K2_CHECK_GT(num_waves, 0) << "Please provide at least one wave file";
+
+  std::vector<std::string> wave_filenames(num_waves);
+  for (int32_t i = 0; i < num_waves; ++i) {
+    wave_filenames[i] = po.GetArg(i + 1);
+  }
+
+  K2_LOG(INFO) << "Loading wave files";
+  std::vector<torch::Tensor> wave_data =
+      k2::ReadWave(wave_filenames, fbank_opts.frame_opts.samp_freq);
+  for (auto &w : wave_data) {
+    w = w.to(device);
+  }
+
+  fbank_opts.device = device;
+
+  kaldifeat::Fbank fbank(fbank_opts);
+
+  K2_LOG(INFO) << "Computing features";
+  std::vector<int64_t> num_frames;
+  std::vector<torch::Tensor> features_vec =
+      k2::ComputeFeatures(fbank, wave_data, &num_frames);
+
+  // Note: math.log(1e-10) is -23.025850929940457
+  torch::Tensor features = torch::nn::utils::rnn::pad_sequence(
+      features_vec, /*batch_first*/ true,
+      /*padding_value*/ -23.025850929940457f);
+  torch::Tensor feature_lens = torch::tensor(num_frames, device);
+
+  K2_LOG(INFO) << "Loading neural network model from " << nn_model;
+  torch::jit::Module module = torch::jit::load(nn_model);
+  module.eval();
+  module.to(device);
+
+  K2_LOG(INFO) << "Computing output of the encoder network";
+
+  auto outputs = module.attr("encoder")
+                     .toModule()
+                     .run_method("forward", features, feature_lens)
+                     .toTuple();
+  assert(outputs->elements().size() == 2u);
+
+  auto encoder_out = outputs->elements()[0].toTensor();
+  auto encoder_out_lens = outputs->elements()[1].toTensor();
+
+  K2_LOG(INFO) << "Using " << decoding_method;
+
+  std::vector<std::vector<int32_t>> hyp_tokens;
+  if (decoding_method == "greedy_search") {
+    hyp_tokens = k2::GreedySearch(module, encoder_out, encoder_out_lens.cpu());
+  } else {
+    hyp_tokens =
+        k2::ModifiedBeamSearch(module, encoder_out, encoder_out_lens.cpu());
+  }
+
+  sentencepiece::SentencePieceProcessor processor;
+  auto status = processor.Load(bpe_model);
+  K2_CHECK(status.ok()) << status.ToString();
+
+  std::vector<std::string> texts;
+  for (const auto &ids : hyp_tokens) {
+    std::string text;
+    status = processor.Decode(ids, &text);
+    K2_CHECK(status.ok()) << status.ToString();
+    texts.emplace_back(std::move(text));
+  }
+
+  std::ostringstream os;
+  os << "\nDecoding result:\n\n";
+  for (int32_t i = 0; i != num_waves; ++i) {
+    os << wave_filenames[i] << "\n";
+    os << texts[i];
+    os << "\n\n";
+  }
+  K2_LOG(INFO) << os.str();
+};
@@ -3,12 +3,14 @@ include_directories(${CMAKE_SOURCE_DIR})
 # it is located in k2/csrc/cmake/transform.cmake
 include(transform)
 set(k2_torch_srcs
+  beam_search.cu
   decode.cu
   dense_fsa_vec.cu
   deserialization.cu
   features.cu
   fsa_algo.cu
   fsa_class.cu
+  hypothesis.cu
   nbest.cu
   parse_options.cu
   symbol_table.cu
@@ -28,6 +30,7 @@ set(k2_torch_test_srcs
   dense_fsa_vec_test.cu
   deserialization_test.cu
   fsa_class_test.cu
+  hypothesis_test.cu
   parse_options_test.cu
   wave_reader_test.cu
 )