Fix precision (#951)

pkufool · web-flow · commit 1b29f0a946f5 · 2022-04-13T08:46:49.000+08:00
* Fix precision

* Using different pow version for windows and *nix

* Use int64_t pow

* Minor fixes
diff --git a/k2/csrc/math.h b/k2/csrc/math.h
@@ -27,6 +27,20 @@
 
 namespace k2 {
 
+// Currently, only used in k2/csrc/rnnt_decode.cu
+// See https://github.com/k2-fsa/k2/pull/951#issuecomment-1096650842
+__host__ __device__ __forceinline__ int64_t Pow(int64_t base,
+                                                int64_t exponent) {
+  K2_CHECK_GE(exponent, 0);
+  int64_t exp = 0;
+  int64_t result = 1;
+  while (exp < exponent) {
+    result *= base;
+    exp++;
+  }
+  return result;
+}
+
 /*
   Returns index of highest bit set, in range -1..30.
   HighestBitSet(0) = -1,
@@ -106,29 +120,29 @@ int32_t RandIntGeometric(int32_t min, int32_t max);
  type, but for types float and double it "fixes" the broken behavior of
  the C++ standard w.r.t. infinity allowing infinities to be parsed.
 */
-template<class T> struct InputFixer {
+template <class T>
+struct InputFixer {
   T t;
   // cast operator
   operator T() const { return t; }
 };
 
-
 namespace internal {
 template <typename Real>
 Real FixedRead(std::istream &is);
 }
 
 template <typename T>
-inline std::istream &operator >>(std::istream &is, InputFixer<T> &f) {
+inline std::istream &operator>>(std::istream &is, InputFixer<T> &f) {
   return is >> f.t;
 }
 template <>
-inline std::istream &operator >>(std::istream &is, InputFixer<float> &f) {
+inline std::istream &operator>>(std::istream &is, InputFixer<float> &f) {
   f.t = internal::FixedRead<float>(is);
   return is;
 }
 template <>
-inline std::istream &operator >>(std::istream &is, InputFixer<double> &f) {
+inline std::istream &operator>>(std::istream &is, InputFixer<double> &f) {
   f.t = internal::FixedRead<double>(is);
   return is;
 }
diff --git a/k2/csrc/rnnt_decode.cu b/k2/csrc/rnnt_decode.cu
@@ -159,8 +159,8 @@ void RnntDecodingStreams::GetContexts(RaggedShape *shape,
         int64_t state_value = states_values_data[state_idx01x],
                 context_state = state_value / num_graph_states,
                 exp = decoder_history_len - col,
-                state = context_state % (int64_t)powf(vocab_size, exp);
-        state = state / (int64_t)powf(vocab_size, exp - 1);
+                state = context_state % Pow(vocab_size, exp);
+        state = state / Pow(vocab_size, exp - 1);
         contexts_acc(row, col) = state;
       });
 }
@@ -540,7 +540,7 @@ void RnntDecodingStreams::Advance(const Array2<float> &logprobs) {
           // can be done with `358 % 10^2`, then we append 6 to 58, that can be
           // done with `58 * 10 + 6`.
           context_state = this_context_state %
-                          (int64_t)powf(vocab_size, decoder_history_len - 1);
+                          Pow(vocab_size, decoder_history_len - 1);
           context_state = context_state * vocab_size + arc.label;
         }