diff --git a/CMakeLists.txt b/CMakeLists.txt
index 70554ad1b..f315d7cf6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -253,7 +253,7 @@ if(BUILD_JPIP_SERVER)
 endif()
 add_subdirectory(src/lib)
 option(BUILD_LUTS_GENERATOR "Build utility to generate t1_luts.h" OFF)
-option(BUILD_BENCH_DWT "Build bench_dwt utility (development benchmark)" OFF)
+option(BUILD_UNIT_TESTS "Build unit tests (bench_dwt, test_sparse_array, etc..)" OFF)
 
 #-----------------------------------------------------------------------------
 # Build Applications
diff --git a/src/bin/jp2/opj_compress.c b/src/bin/jp2/opj_compress.c
index 494b366a7..ab07fbb50 100644
--- a/src/bin/jp2/opj_compress.c
+++ b/src/bin/jp2/opj_compress.c
@@ -907,8 +907,9 @@ static int parse_cmdline_encoder(int argc, char **argv,
         case 'b': {         /* code-block dimension */
             int cblockw_init = 0, cblockh_init = 0;
             sscanf(opj_optarg, "%d,%d", &cblockw_init, &cblockh_init);
-            if (cblockw_init * cblockh_init > 4096 || cblockw_init > 1024
-                    || cblockw_init < 4 || cblockh_init > 1024 || cblockh_init < 4) {
+            if (cblockw_init > 1024 || cblockw_init < 4 ||
+                    cblockh_init > 1024 || cblockh_init < 4 ||
+                    cblockw_init * cblockh_init > 4096) {
                 fprintf(stderr,
                         "!! Size of code_block error (option -b) !!\n\nRestriction :\n"
                         "    * width*height<=4096\n    * 4<=width,height<= 1024\n\n");
diff --git a/src/lib/openjp2/CMakeLists.txt b/src/lib/openjp2/CMakeLists.txt
index 57c1751ef..697b07ea2 100644
--- a/src/lib/openjp2/CMakeLists.txt
+++ b/src/lib/openjp2/CMakeLists.txt
@@ -54,6 +54,8 @@ set(OPENJPEG_SRCS
   ${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.c
   ${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.h
   ${CMAKE_CURRENT_SOURCE_DIR}/opj_stdint.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/sparse_array.c
+  ${CMAKE_CURRENT_SOURCE_DIR}/sparse_array.h
 )
 if(BUILD_JPIP)
   add_definitions(-DUSE_JPIP)
@@ -192,12 +194,20 @@ if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
    TARGET_LINK_LIBRARIES(${OPENJPEG_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT})
 endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
 
-if(BUILD_BENCH_DWT)
-    add_executable(bench_dwt bench_dwt.c dwt.c opj_malloc.c thread.c)
+if(BUILD_UNIT_TESTS)
+    add_executable(bench_dwt bench_dwt.c)
     if(UNIX)
-        target_link_libraries(bench_dwt m)
+        target_link_libraries(bench_dwt m ${OPENJPEG_LIBRARY_NAME})
     endif()
     if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
         target_link_libraries(bench_dwt ${CMAKE_THREAD_LIBS_INIT})
     endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
-endif(BUILD_BENCH_DWT)
+
+    add_executable(test_sparse_array test_sparse_array.c)
+    if(UNIX)
+        target_link_libraries(test_sparse_array m ${OPENJPEG_LIBRARY_NAME})
+    endif()
+    if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
+        target_link_libraries(test_sparse_array ${CMAKE_THREAD_LIBS_INIT})
+    endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
+endif(BUILD_UNIT_TESTS)
diff --git a/src/lib/openjp2/bench_dwt.c b/src/lib/openjp2/bench_dwt.c
index 36f4c0c9a..0dc278f3e 100644
--- a/src/lib/openjp2/bench_dwt.c
+++ b/src/lib/openjp2/bench_dwt.c
@@ -198,10 +198,11 @@ int main(int argc, char** argv)
 
     memset(&tcd, 0, sizeof(tcd));
     tcd.thread_pool = tp;
-    tcd.decoded_x0 = (OPJ_UINT32)tilec.x0;
-    tcd.decoded_y0 = (OPJ_UINT32)tilec.y0;
-    tcd.decoded_x1 = (OPJ_UINT32)tilec.x1;
-    tcd.decoded_y1 = (OPJ_UINT32)tilec.y1;
+    tcd.whole_tile_decoding = OPJ_TRUE;
+    tcd.win_x0 = (OPJ_UINT32)tilec.x0;
+    tcd.win_y0 = (OPJ_UINT32)tilec.y0;
+    tcd.win_x1 = (OPJ_UINT32)tilec.x1;
+    tcd.win_y1 = (OPJ_UINT32)tilec.y1;
     tcd.tcd_image = &tcd_image;
     memset(&tcd_image, 0, sizeof(tcd_image));
     tcd_image.tiles = &tcd_tile;
diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c
index 7377b6429..18270045f 100644
--- a/src/lib/openjp2/dwt.c
+++ b/src/lib/openjp2/dwt.c
@@ -151,9 +151,9 @@ Inverse wavelet transform in 2-D.
 static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
                                     opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i);
 
-static OPJ_BOOL opj_dwt_decode_partial_tile(opj_tcd_t *p_tcd,
-        opj_tcd_tilecomp_t* tilec,
-        OPJ_UINT32 numres);
+static OPJ_BOOL opj_dwt_decode_partial_tile(
+    opj_tcd_tilecomp_t* tilec,
+    OPJ_UINT32 numres);
 
 static OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
         void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32));
@@ -279,7 +279,7 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
         l_src += 2;
     } /* b[i*x]=a[2*i+cas]; */
 
-    l_dest = b + sn * x;
+    l_dest = b + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)x;
     l_src = a + 1 - cas;
 
     i = dn;
@@ -325,7 +325,7 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
         bi += 2;
         ai += x;
     }
-    ai = a + (v->sn * x);
+    ai = a + (v->sn * (OPJ_SIZE_T)x);
     bi = v->mem + 1 - v->cas;
     i = v->dn ;
     while (i--) {
@@ -616,7 +616,7 @@ static
 void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col,
                                const OPJ_INT32* tmp,
                                OPJ_INT32 len,
-                               OPJ_INT32 stride)
+                               OPJ_SIZE_T stride)
 {
     OPJ_INT32 i;
     for (i = 0; i < len; ++i) {
@@ -625,9 +625,9 @@ void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col,
                     PARALLEL_COLS_53 * sizeof(OPJ_INT32))
            would do but would be a tiny bit slower.
            We can take here advantage of our knowledge of alignment */
-        STOREU(&tiledp_col[i * stride + 0],
+        STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + 0],
                LOAD(&tmp[PARALLEL_COLS_53 * i + 0]));
-        STOREU(&tiledp_col[i * stride + VREG_INT_COUNT],
+        STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + VREG_INT_COUNT],
                LOAD(&tmp[PARALLEL_COLS_53 * i + VREG_INT_COUNT]));
     }
 }
@@ -639,12 +639,13 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
     const OPJ_INT32 sn,
     const OPJ_INT32 len,
     OPJ_INT32* tiledp_col,
-    const OPJ_INT32 stride)
+    const OPJ_SIZE_T stride)
 {
     const OPJ_INT32* in_even = &tiledp_col[0];
-    const OPJ_INT32* in_odd = &tiledp_col[sn * stride];
+    const OPJ_INT32* in_odd = &tiledp_col[(OPJ_SIZE_T)sn * stride];
 
-    OPJ_INT32 i, j;
+    OPJ_INT32 i;
+    OPJ_SIZE_T j;
     VREG d1c_0, d1n_0, s1n_0, s0c_0, s0n_0;
     VREG d1c_1, d1n_1, s1n_1, s0c_1, s0n_1;
     const VREG two = LOAD_CST(2);
@@ -661,7 +662,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
     /* Note: loads of input even/odd values must be done in a unaligned */
     /* fashion. But stores in tmp can be done with aligned store, since */
     /* the temporary buffer is properly aligned */
-    assert((size_t)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
+    assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
 
     s1n_0 = LOADU(in_even + 0);
     s1n_1 = LOADU(in_even + VREG_INT_COUNT);
@@ -703,7 +704,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
 
     if (len & 1) {
         VREG tmp_len_minus_1;
-        s1n_0 = LOADU(in_even + ((len - 1) / 2) * stride);
+        s1n_0 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride);
         /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */
         tmp_len_minus_1 = SUB(s1n_0, SAR(ADD3(d1n_0, d1n_0, two), 2));
         STORE(tmp + PARALLEL_COLS_53 * (len - 1), tmp_len_minus_1);
@@ -711,7 +712,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
         STORE(tmp + PARALLEL_COLS_53 * (len - 2),
               ADD(d1n_0, SAR(ADD(s0n_0, tmp_len_minus_1), 1)));
 
-        s1n_1 = LOADU(in_even + ((len - 1) / 2) * stride + VREG_INT_COUNT);
+        s1n_1 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride + VREG_INT_COUNT);
         /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */
         tmp_len_minus_1 = SUB(s1n_1, SAR(ADD3(d1n_1, d1n_1, two), 2));
         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT,
@@ -739,15 +740,16 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(
     const OPJ_INT32 sn,
     const OPJ_INT32 len,
     OPJ_INT32* tiledp_col,
-    const OPJ_INT32 stride)
+    const OPJ_SIZE_T stride)
 {
-    OPJ_INT32 i, j;
+    OPJ_INT32 i;
+    OPJ_SIZE_T j;
 
     VREG s1_0, s2_0, dc_0, dn_0;
     VREG s1_1, s2_1, dc_1, dn_1;
     const VREG two = LOAD_CST(2);
 
-    const OPJ_INT32* in_even = &tiledp_col[sn * stride];
+    const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride];
     const OPJ_INT32* in_odd = &tiledp_col[0];
 
     assert(len > 2);
@@ -762,7 +764,7 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(
     /* Note: loads of input even/odd values must be done in a unaligned */
     /* fashion. But stores in tmp can be done with aligned store, since */
     /* the temporary buffer is properly aligned */
-    assert((size_t)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
+    assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
 
     s1_0 = LOADU(in_even + stride);
     /* in_odd[0] - ((in_even[0] + s1 + 2) >> 2); */
@@ -807,9 +809,9 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(
 
     if (!(len & 1)) {
         /*dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); */
-        dn_0 = SUB(LOADU(in_odd + (len / 2 - 1) * stride),
+        dn_0 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride),
                    SAR(ADD3(s1_0, s1_0, two), 2));
-        dn_1 = SUB(LOADU(in_odd + (len / 2 - 1) * stride + VREG_INT_COUNT),
+        dn_1 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride + VREG_INT_COUNT),
                    SAR(ADD3(s1_1, s1_1, two), 2));
 
         /* tmp[len - 2] = s1 + ((dn + dc) >> 1); */
@@ -849,7 +851,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
                              const OPJ_INT32 sn,
                              const OPJ_INT32 len,
                              OPJ_INT32* tiledp_col,
-                             const OPJ_INT32 stride)
+                             const OPJ_SIZE_T stride)
 {
     OPJ_INT32 i, j;
     OPJ_INT32 d1c, d1n, s1n, s0c, s0n;
@@ -860,15 +862,15 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
     /* accesses and explicit interleaving. */
 
     s1n = tiledp_col[0];
-    d1n = tiledp_col[sn * stride];
+    d1n = tiledp_col[(OPJ_SIZE_T)sn * stride];
     s0n = s1n - ((d1n + 1) >> 1);
 
     for (i = 0, j = 0; i < (len - 3); i += 2, j++) {
         d1c = d1n;
         s0c = s0n;
 
-        s1n = tiledp_col[(j + 1) * stride];
-        d1n = tiledp_col[(sn + j + 1) * stride];
+        s1n = tiledp_col[(OPJ_SIZE_T)(j + 1) * stride];
+        d1n = tiledp_col[(OPJ_SIZE_T)(sn + j + 1) * stride];
 
         s0n = s1n - ((d1c + d1n + 2) >> 2);
 
@@ -880,7 +882,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
 
     if (len & 1) {
         tmp[len - 1] =
-            tiledp_col[((len - 1) / 2) * stride] -
+            tiledp_col[(OPJ_SIZE_T)((len - 1) / 2) * stride] -
             ((d1n + 1) >> 1);
         tmp[len - 2] = d1n + ((s0n + tmp[len - 1]) >> 1);
     } else {
@@ -888,7 +890,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
     }
 
     for (i = 0; i < len; ++i) {
-        tiledp_col[i * stride] = tmp[i];
+        tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i];
     }
 }
 
@@ -899,11 +901,11 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
                              const OPJ_INT32 sn,
                              const OPJ_INT32 len,
                              OPJ_INT32* tiledp_col,
-                             const OPJ_INT32 stride)
+                             const OPJ_SIZE_T stride)
 {
     OPJ_INT32 i, j;
     OPJ_INT32 s1, s2, dc, dn;
-    const OPJ_INT32* in_even = &tiledp_col[sn * stride];
+    const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride];
     const OPJ_INT32* in_odd = &tiledp_col[0];
 
     assert(len > 2);
@@ -916,9 +918,9 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
     tmp[0] = in_even[0] + dc;
     for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) {
 
-        s2 = in_even[(j + 1) * stride];
+        s2 = in_even[(OPJ_SIZE_T)(j + 1) * stride];
 
-        dn = in_odd[j * stride] - ((s1 + s2 + 2) >> 2);
+        dn = in_odd[(OPJ_SIZE_T)j * stride] - ((s1 + s2 + 2) >> 2);
         tmp[i  ] = dc;
         tmp[i + 1] = s1 + ((dn + dc) >> 1);
 
@@ -927,7 +929,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
     }
     tmp[i] = dc;
     if (!(len & 1)) {
-        dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1);
+        dn = in_odd[(OPJ_SIZE_T)(len / 2 - 1) * stride] - ((s1 + 1) >> 1);
         tmp[len - 2] = s1 + ((dn + dc) >> 1);
         tmp[len - 1] = dn;
     } else {
@@ -935,7 +937,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
     }
 
     for (i = 0; i < len; ++i) {
-        tiledp_col[i * stride] = tmp[i];
+        tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i];
     }
 }
 #endif /* !defined(STANDARD_SLOW_VERSION) */
@@ -946,7 +948,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
 /* Performs interleave, inverse wavelet transform and copy back to buffer */
 static void opj_idwt53_v(const opj_dwt_t *dwt,
                          OPJ_INT32* tiledp_col,
-                         OPJ_INT32 stride,
+                         OPJ_SIZE_T stride,
                          OPJ_INT32 nb_cols)
 {
 #ifdef STANDARD_SLOW_VERSION
@@ -994,14 +996,14 @@ static void opj_idwt53_v(const opj_dwt_t *dwt,
             OPJ_INT32* out = dwt->mem;
             for (c = 0; c < nb_cols; c++, tiledp_col++) {
                 OPJ_INT32 i;
-                const OPJ_INT32* in_even = &tiledp_col[sn * stride];
+                const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride];
                 const OPJ_INT32* in_odd = &tiledp_col[0];
 
                 out[1] = in_odd[0] - ((in_even[0] + 1) >> 1);
                 out[0] = in_even[0] + out[1];
 
                 for (i = 0; i < len; ++i) {
-                    tiledp_col[i * stride] = out[i];
+                    tiledp_col[(OPJ_SIZE_T)i * stride] = out[i];
                 }
             }
 
@@ -1111,7 +1113,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
 
     OPJ_INT32 rw;           /* width of the resolution level computed   */
     OPJ_INT32 rh;           /* height of the resolution level computed  */
-    size_t l_data_size;
+    OPJ_SIZE_T l_data_size;
 
     opj_tcd_resolution_t * l_cur_res = 0;
     opj_tcd_resolution_t * l_last_res = 0;
@@ -1194,50 +1196,16 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec)
     return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1);
 }
 
-static OPJ_BOOL opj_dwt_is_whole_tile_decoding(opj_tcd_t *p_tcd,
-        opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres)
-{
-    opj_image_comp_t* image_comp = &(p_tcd->image->comps[tilec->compno]);
-    /* Compute the intersection of the area of interest, expressed in tile coordinates */
-    /* with the tile coordinates */
-    OPJ_UINT32 tcx0 = opj_uint_max(
-                          (OPJ_UINT32)tilec->x0,
-                          opj_uint_ceildiv(p_tcd->decoded_x0, image_comp->dx));
-    OPJ_UINT32 tcy0 = opj_uint_max(
-                          (OPJ_UINT32)tilec->y0,
-                          opj_uint_ceildiv(p_tcd->decoded_y0, image_comp->dy));
-    OPJ_UINT32 tcx1 = opj_uint_min(
-                          (OPJ_UINT32)tilec->x1,
-                          opj_uint_ceildiv(p_tcd->decoded_x1, image_comp->dx));
-    OPJ_UINT32 tcy1 = opj_uint_min(
-                          (OPJ_UINT32)tilec->y1,
-                          opj_uint_ceildiv(p_tcd->decoded_y1, image_comp->dy));
-
-    OPJ_UINT32 shift = tilec->numresolutions - numres;
-
-    /* Tolerate small margin within the reduced resolution factor to consider if */
-    /* the whole tile path must be taken */
-    return (tcx0 >= (OPJ_UINT32)tilec->x0 &&
-            tcy0 >= (OPJ_UINT32)tilec->y0 &&
-            tcx1 <= (OPJ_UINT32)tilec->x1 &&
-            tcy1 <= (OPJ_UINT32)tilec->y1 &&
-            (shift >= 32 ||
-             (((tcx0 - (OPJ_UINT32)tilec->x0) >> shift) == 0 &&
-              ((tcy0 - (OPJ_UINT32)tilec->y0) >> shift) == 0 &&
-              (((OPJ_UINT32)tilec->x1 - tcx1) >> shift) == 0 &&
-              (((OPJ_UINT32)tilec->y1 - tcy1) >> shift) == 0)));
-}
-
 /* <summary>                            */
 /* Inverse 5-3 wavelet transform in 2-D. */
 /* </summary>                           */
 OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec,
                         OPJ_UINT32 numres)
 {
-    if (opj_dwt_is_whole_tile_decoding(p_tcd, tilec, numres)) {
+    if (p_tcd->whole_tile_decoding) {
         return opj_dwt_decode_tile(p_tcd->thread_pool, tilec, numres);
     } else {
-        return opj_dwt_decode_partial_tile(p_tcd, tilec, numres);
+        return opj_dwt_decode_partial_tile(tilec, numres);
     }
 }
 
@@ -1375,11 +1343,11 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
     job = (opj_dwd_decode_v_job_t*)user_data;
     for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j;
             j += PARALLEL_COLS_53) {
-        opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w,
+        opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w,
                      PARALLEL_COLS_53);
     }
     if (j < job->max_j)
-        opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w,
+        opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w,
                      (OPJ_INT32)(job->max_j - j));
 
     opj_aligned_free(job->v.mem);
@@ -1403,8 +1371,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
     OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 -
                                  tr->y0);  /* height of the resolution level computed */
 
-    OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
-    size_t h_mem_size;
+    OPJ_UINT32 w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions -
+                                                               1].x1 -
+                                tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
+    OPJ_SIZE_T h_mem_size;
     int num_threads;
 
     if (numres == 1U) {
@@ -1445,7 +1415,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
 
         if (num_threads <= 1 || rh <= 1) {
             for (j = 0; j < rh; ++j) {
-                opj_idwt53_h(&h, &tiledp[j * w]);
+                opj_idwt53_h(&h, &tiledp[(OPJ_SIZE_T)j * w]);
             }
         } else {
             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
@@ -1497,10 +1467,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
         if (num_threads <= 1 || rw <= 1) {
             for (j = 0; j + PARALLEL_COLS_53 <= rw;
                     j += PARALLEL_COLS_53) {
-                opj_idwt53_v(&v, &tiledp[j], (OPJ_INT32)w, PARALLEL_COLS_53);
+                opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, PARALLEL_COLS_53);
             }
             if (j < rw) {
-                opj_idwt53_v(&v, &tiledp[j], (OPJ_INT32)w, (OPJ_INT32)(rw - j));
+                opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, (OPJ_INT32)(rw - j));
             }
         } else {
             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
@@ -1552,51 +1522,56 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
 
 static void opj_dwt_interleave_partial_h(OPJ_INT32 *dest,
         OPJ_INT32 cas,
-        const OPJ_INT32* src,
-        OPJ_INT32 sn,
-        OPJ_INT32 win_l_x0,
-        OPJ_INT32 win_l_x1,
-        OPJ_INT32 win_h_x0,
-        OPJ_INT32 win_h_x1)
+        opj_sparse_array_int32_t* sa,
+        OPJ_UINT32 sa_line,
+        OPJ_UINT32 sn,
+        OPJ_UINT32 win_l_x0,
+        OPJ_UINT32 win_l_x1,
+        OPJ_UINT32 win_h_x0,
+        OPJ_UINT32 win_h_x1)
 {
-    const OPJ_INT32 *ai = src;
-    OPJ_INT32 *bi = dest + cas;
-    OPJ_INT32  i;
-
-    for (i = win_l_x0; i < win_l_x1; i++) {
-        bi[2 * i] = ai[i];
-    }
-
-    ai  = src + sn;
-    bi  = dest + 1 - cas;
-    for (i = win_h_x0; i < win_h_x1; i++) {
-        bi[2 * i] = ai[i];
-    }
+    OPJ_BOOL ret;
+    ret = opj_sparse_array_int32_read(sa,
+                                      win_l_x0, sa_line,
+                                      win_l_x1, sa_line + 1,
+                                      dest + cas + 2 * win_l_x0,
+                                      2, 0, OPJ_TRUE);
+    assert(ret);
+    ret = opj_sparse_array_int32_read(sa,
+                                      sn + win_h_x0, sa_line,
+                                      sn + win_h_x1, sa_line + 1,
+                                      dest + 1 - cas + 2 * win_h_x0,
+                                      2, 0, OPJ_TRUE);
+    assert(ret);
+    OPJ_UNUSED(ret);
 }
 
+
 static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
         OPJ_INT32 cas,
-        const OPJ_INT32* src,
-        OPJ_INT32 sn,
-        OPJ_INT32 stride,
-        OPJ_INT32 win_l_y0,
-        OPJ_INT32 win_l_y1,
-        OPJ_INT32 win_h_y0,
-        OPJ_INT32 win_h_y1)
+        opj_sparse_array_int32_t* sa,
+        OPJ_UINT32 sa_col,
+        OPJ_UINT32 nb_cols,
+        OPJ_UINT32 sn,
+        OPJ_UINT32 win_l_y0,
+        OPJ_UINT32 win_l_y1,
+        OPJ_UINT32 win_h_y0,
+        OPJ_UINT32 win_h_y1)
 {
-    const OPJ_INT32 *ai = src;
-    OPJ_INT32 *bi = dest + cas;
-    OPJ_INT32  i;
-
-    for (i = win_l_y0; i < win_l_y1; i++) {
-        bi[2 * i] = ai[i * stride];
-    }
-
-    ai  = src + sn * stride;
-    bi  = dest + 1 - cas;
-    for (i = win_h_y0; i < win_h_y1; i++) {
-        bi[2 * i] = ai[i * stride];
-    }
+    OPJ_BOOL ret;
+    ret  = opj_sparse_array_int32_read(sa,
+                                       sa_col, win_l_y0,
+                                       sa_col + nb_cols, win_l_y1,
+                                       dest + cas * 4 + 2 * 4 * win_l_y0,
+                                       1, 2 * 4, OPJ_TRUE);
+    assert(ret);
+    ret = opj_sparse_array_int32_read(sa,
+                                      sa_col, sn + win_h_y0,
+                                      sa_col + nb_cols, sn + win_h_y1,
+                                      dest + (1 - cas) * 4 + 2 * 4 * win_h_y0,
+                                      1, 2 * 4, OPJ_TRUE);
+    assert(ret);
+    OPJ_UNUSED(ret);
 }
 
 static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
@@ -1610,12 +1585,55 @@ static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
 
     if (!cas) {
         if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
-            for (i = win_l_x0; i < win_l_x1; i++) {
+
+            /* Naive version is :
+            for (i = win_l_x0; i < i_max; i++) {
                 OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
             }
             for (i = win_h_x0; i < win_h_x1; i++) {
                 OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
             }
+            but the compiler doesn't manage to unroll it to avoid bound
+            checking in OPJ_S_ and OPJ_D_ macros
+            */
+
+            i = win_l_x0;
+            if (i < win_l_x1) {
+                OPJ_INT32 i_max;
+
+                /* Left-most case */
+                OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
+                i ++;
+
+                i_max = win_l_x1;
+                if (i_max > dn) {
+                    i_max = dn;
+                }
+                for (; i < i_max; i++) {
+                    /* No bound checking */
+                    OPJ_S(i) -= (OPJ_D(i - 1) + OPJ_D(i) + 2) >> 2;
+                }
+                for (; i < win_l_x1; i++) {
+                    /* Right-most case */
+                    OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
+                }
+            }
+
+            i = win_h_x0;
+            if (i < win_h_x1) {
+                OPJ_INT32 i_max = win_h_x1;
+                if (i_max >= sn) {
+                    i_max = sn - 1;
+                }
+                for (; i < i_max; i++) {
+                    /* No bound checking */
+                    OPJ_D(i) += (OPJ_S(i) + OPJ_S(i + 1)) >> 1;
+                }
+                for (; i < win_h_x1; i++) {
+                    /* Right-most case */
+                    OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
+                }
+            }
         }
     } else {
         if (!sn  && dn == 1) {        /* NEW :  CASE ONE ELEMENT */
@@ -1631,6 +1649,150 @@ static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
     }
 }
 
+#define OPJ_S_off(i,off) a[(OPJ_UINT32)(i)*2*4+off]
+#define OPJ_D_off(i,off) a[(1+(OPJ_UINT32)(i)*2)*4+off]
+#define OPJ_S__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=sn?OPJ_S_off(sn-1,off):OPJ_S_off(i,off)))
+#define OPJ_D__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=dn?OPJ_D_off(dn-1,off):OPJ_D_off(i,off)))
+#define OPJ_SS__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=dn?OPJ_S_off(dn-1,off):OPJ_S_off(i,off)))
+#define OPJ_DD__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=sn?OPJ_D_off(sn-1,off):OPJ_D_off(i,off)))
+
+static void opj_dwt_decode_partial_1_parallel(OPJ_INT32 *a,
+        OPJ_UINT32 nb_cols,
+        OPJ_INT32 dn, OPJ_INT32 sn,
+        OPJ_INT32 cas,
+        OPJ_INT32 win_l_x0,
+        OPJ_INT32 win_l_x1,
+        OPJ_INT32 win_h_x0,
+        OPJ_INT32 win_h_x1)
+{
+    OPJ_INT32 i;
+    OPJ_UINT32 off;
+
+    (void)nb_cols;
+
+    if (!cas) {
+        if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
+
+            /* Naive version is :
+            for (i = win_l_x0; i < i_max; i++) {
+                OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
+            }
+            for (i = win_h_x0; i < win_h_x1; i++) {
+                OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
+            }
+            but the compiler doesn't manage to unroll it to avoid bound
+            checking in OPJ_S_ and OPJ_D_ macros
+            */
+
+            i = win_l_x0;
+            if (i < win_l_x1) {
+                OPJ_INT32 i_max;
+
+                /* Left-most case */
+                for (off = 0; off < 4; off++) {
+                    OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2;
+                }
+                i ++;
+
+                i_max = win_l_x1;
+                if (i_max > dn) {
+                    i_max = dn;
+                }
+
+#ifdef __SSE2__
+                if (i + 1 < i_max) {
+                    const __m128i two = _mm_set1_epi32(2);
+                    __m128i Dm1 = _mm_load_si128((__m128i * const)(a + 4 + (i - 1) * 8));
+                    for (; i + 1 < i_max; i += 2) {
+                        /* No bound checking */
+                        __m128i S = _mm_load_si128((__m128i * const)(a + i * 8));
+                        __m128i D = _mm_load_si128((__m128i * const)(a + 4 + i * 8));
+                        __m128i S1 = _mm_load_si128((__m128i * const)(a + (i + 1) * 8));
+                        __m128i D1 = _mm_load_si128((__m128i * const)(a + 4 + (i + 1) * 8));
+                        S = _mm_sub_epi32(S,
+                                          _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(Dm1, D), two), 2));
+                        S1 = _mm_sub_epi32(S1,
+                                           _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(D, D1), two), 2));
+                        _mm_store_si128((__m128i*)(a + i * 8), S);
+                        _mm_store_si128((__m128i*)(a + (i + 1) * 8), S1);
+                        Dm1 = D1;
+                    }
+                }
+#endif
+
+                for (; i < i_max; i++) {
+                    /* No bound checking */
+                    for (off = 0; off < 4; off++) {
+                        OPJ_S_off(i, off) -= (OPJ_D_off(i - 1, off) + OPJ_D_off(i, off) + 2) >> 2;
+                    }
+                }
+                for (; i < win_l_x1; i++) {
+                    /* Right-most case */
+                    for (off = 0; off < 4; off++) {
+                        OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2;
+                    }
+                }
+            }
+
+            i = win_h_x0;
+            if (i < win_h_x1) {
+                OPJ_INT32 i_max = win_h_x1;
+                if (i_max >= sn) {
+                    i_max = sn - 1;
+                }
+
+#ifdef __SSE2__
+                if (i + 1 < i_max) {
+                    __m128i S =  _mm_load_si128((__m128i * const)(a + i * 8));
+                    for (; i + 1 < i_max; i += 2) {
+                        /* No bound checking */
+                        __m128i D = _mm_load_si128((__m128i * const)(a + 4 + i * 8));
+                        __m128i S1 = _mm_load_si128((__m128i * const)(a + (i + 1) * 8));
+                        __m128i D1 = _mm_load_si128((__m128i * const)(a + 4 + (i + 1) * 8));
+                        __m128i S2 = _mm_load_si128((__m128i * const)(a + (i + 2) * 8));
+                        D = _mm_add_epi32(D, _mm_srai_epi32(_mm_add_epi32(S, S1), 1));
+                        D1 = _mm_add_epi32(D1, _mm_srai_epi32(_mm_add_epi32(S1, S2), 1));
+                        _mm_store_si128((__m128i*)(a + 4 + i * 8), D);
+                        _mm_store_si128((__m128i*)(a + 4 + (i + 1) * 8), D1);
+                        S = S2;
+                    }
+                }
+#endif
+
+                for (; i < i_max; i++) {
+                    /* No bound checking */
+                    for (off = 0; off < 4; off++) {
+                        OPJ_D_off(i, off) += (OPJ_S_off(i, off) + OPJ_S_off(i + 1, off)) >> 1;
+                    }
+                }
+                for (; i < win_h_x1; i++) {
+                    /* Right-most case */
+                    for (off = 0; off < 4; off++) {
+                        OPJ_D_off(i, off) += (OPJ_S__off(i, off) + OPJ_S__off(i + 1, off)) >> 1;
+                    }
+                }
+            }
+        }
+    } else {
+        if (!sn  && dn == 1) {        /* NEW :  CASE ONE ELEMENT */
+            for (off = 0; off < 4; off++) {
+                OPJ_S_off(0, off) /= 2;
+            }
+        } else {
+            for (i = win_l_x0; i < win_l_x1; i++) {
+                for (off = 0; off < 4; off++) {
+                    OPJ_D_off(i, off) -= (OPJ_SS__off(i, off) + OPJ_SS__off(i + 1, off) + 2) >> 2;
+                }
+            }
+            for (i = win_h_x0; i < win_h_x1; i++) {
+                for (off = 0; off < 4; off++) {
+                    OPJ_S_off(i, off) += (OPJ_DD__off(i, off) + OPJ_DD__off(i - 1, off)) >> 1;
+                }
+            }
+        }
+    }
+}
+
 static void opj_dwt_get_band_coordinates(opj_tcd_tilecomp_t* tilec,
         OPJ_UINT32 resno,
         OPJ_UINT32 bandno,
@@ -1683,10 +1845,68 @@ static void opj_dwt_segment_grow(OPJ_UINT32 filter_width,
     *end = opj_uint_min(*end, max_size);
 }
 
-static OPJ_BOOL opj_dwt_decode_partial_tile(opj_tcd_t *tcd,
-        opj_tcd_tilecomp_t* tilec,
-        OPJ_UINT32 numres)
+
+static opj_sparse_array_int32_t* opj_dwt_init_sparse_array(
+    opj_tcd_tilecomp_t* tilec,
+    OPJ_UINT32 numres)
 {
+    opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]);
+    OPJ_UINT32 w = (OPJ_UINT32)(tr_max->x1 - tr_max->x0);
+    OPJ_UINT32 h = (OPJ_UINT32)(tr_max->y1 - tr_max->y0);
+    OPJ_UINT32 resno, bandno, precno, cblkno;
+    opj_sparse_array_int32_t* sa = opj_sparse_array_int32_create(
+                                       w, h, opj_uint_min(w, 64), opj_uint_min(h, 64));
+    if (sa == NULL) {
+        return NULL;
+    }
+
+    for (resno = 0; resno < numres; ++resno) {
+        opj_tcd_resolution_t* res = &tilec->resolutions[resno];
+
+        for (bandno = 0; bandno < res->numbands; ++bandno) {
+            opj_tcd_band_t* band = &res->bands[bandno];
+
+            for (precno = 0; precno < res->pw * res->ph; ++precno) {
+                opj_tcd_precinct_t* precinct = &band->precincts[precno];
+                for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
+                    opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
+                    if (cblk->decoded_data != NULL) {
+                        OPJ_UINT32 x = (OPJ_UINT32)(cblk->x0 - band->x0);
+                        OPJ_UINT32 y = (OPJ_UINT32)(cblk->y0 - band->y0);
+                        OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
+                        OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
+
+                        if (band->bandno & 1) {
+                            opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
+                            x += (OPJ_UINT32)(pres->x1 - pres->x0);
+                        }
+                        if (band->bandno & 2) {
+                            opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
+                            y += (OPJ_UINT32)(pres->y1 - pres->y0);
+                        }
+
+                        if (!opj_sparse_array_int32_write(sa, x, y,
+                                                          x + cblk_w, y + cblk_h,
+                                                          cblk->decoded_data,
+                                                          1, cblk_w, OPJ_TRUE)) {
+                            opj_sparse_array_int32_free(sa);
+                            return NULL;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return sa;
+}
+
+
+static OPJ_BOOL opj_dwt_decode_partial_tile(
+    opj_tcd_tilecomp_t* tilec,
+    OPJ_UINT32 numres)
+{
+    opj_sparse_array_int32_t* sa;
     opj_dwt_t h;
     opj_dwt_t v;
     OPJ_UINT32 resno;
@@ -1695,52 +1915,58 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(opj_tcd_t *tcd,
     const OPJ_UINT32 filter_width = 2U;
 
     opj_tcd_resolution_t* tr = tilec->resolutions;
+    opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]);
 
     OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 -
                                  tr->x0);  /* width of the resolution level computed */
     OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 -
                                  tr->y0);  /* height of the resolution level computed */
 
-    OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
-    size_t h_mem_size;
+    OPJ_SIZE_T h_mem_size;
 
-    opj_image_comp_t* image_comp = &(tcd->image->comps[tilec->compno]);
     /* Compute the intersection of the area of interest, expressed in tile coordinates */
     /* with the tile coordinates */
-    OPJ_UINT32 win_tcx0 = opj_uint_max(
-                              (OPJ_UINT32)tilec->x0,
-                              opj_uint_ceildiv(tcd->decoded_x0, image_comp->dx));
-    OPJ_UINT32 win_tcy0 = opj_uint_max(
-                              (OPJ_UINT32)tilec->y0,
-                              opj_uint_ceildiv(tcd->decoded_y0, image_comp->dy));
-    OPJ_UINT32 win_tcx1 = opj_uint_min(
-                              (OPJ_UINT32)tilec->x1,
-                              opj_uint_ceildiv(tcd->decoded_x1, image_comp->dx));
-    OPJ_UINT32 win_tcy1 = opj_uint_min(
-                              (OPJ_UINT32)tilec->y1,
-                              opj_uint_ceildiv(tcd->decoded_y1, image_comp->dy));
+    OPJ_UINT32 win_tcx0 = tilec->win_x0;
+    OPJ_UINT32 win_tcy0 = tilec->win_y0;
+    OPJ_UINT32 win_tcx1 = tilec->win_x1;
+    OPJ_UINT32 win_tcy1 = tilec->win_y1;
+
+    sa = opj_dwt_init_sparse_array(tilec, numres);
 
     if (numres == 1U) {
+        OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
+                       tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
+                       tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
+                       tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
+                       tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
+                       tilec->data_win,
+                       1, tr_max->win_x1 - tr_max->win_x0,
+                       OPJ_TRUE);
+        assert(ret);
+        OPJ_UNUSED(ret);
+        opj_sparse_array_int32_free(sa);
         return OPJ_TRUE;
     }
     h_mem_size = opj_dwt_max_resolution(tr, numres);
     /* overflow check */
-    if (h_mem_size > (SIZE_MAX / sizeof(OPJ_INT32))) {
+    /* in vertical pass, we process 4 columns at a time */
+    if (h_mem_size > (SIZE_MAX / (4 * sizeof(OPJ_INT32)))) {
         /* FIXME event manager error callback */
+        opj_sparse_array_int32_free(sa);
         return OPJ_FALSE;
     }
 
-    h_mem_size *= sizeof(OPJ_INT32);
+    h_mem_size *= 4 * sizeof(OPJ_INT32);
     h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
     if (! h.mem) {
         /* FIXME event manager error callback */
+        opj_sparse_array_int32_free(sa);
         return OPJ_FALSE;
     }
 
     v.mem = h.mem;
 
-    for (resno = 1; --numres > 0; resno ++) {
-        OPJ_INT32 * OPJ_RESTRICT tiledp = tilec->data;
+    for (resno = 1; resno < numres; resno ++) {
         OPJ_UINT32 i, j;
         /* Window of interest subband-based coordinates */
         OPJ_UINT32 win_ll_x0, win_ll_y0, win_ll_x1, win_ll_y1;
@@ -1826,47 +2052,92 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(opj_tcd_t *tcd,
         for (j = 0; j < rh; ++j) {
             if ((j >= win_ll_y0 && j < win_ll_y1) ||
                     (j >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn)) {
-                memset(h.mem, 0, (OPJ_UINT32)(h.sn + h.dn) * sizeof(OPJ_INT32));
+
+                /* Avoids dwt.c:1584:44 (in opj_dwt_decode_partial_1): runtime error: */
+                /* signed integer overflow: -1094795586 + -1094795586 cannot be represented in type 'int' */
+                /* on opj_decompress -i  ../../openjpeg/MAPA.jp2 -o out.tif -d 0,0,256,256 */
+                /* This is less extreme than memsetting the whole buffer to 0 */
+                /* although we could potentially do better with better handling of edge conditions */
+                if (win_tr_x1 >= 1 && win_tr_x1 < rw) {
+                    h.mem[win_tr_x1 - 1] = 0;
+                }
+                if (win_tr_x1 < rw) {
+                    h.mem[win_tr_x1] = 0;
+                }
+
                 opj_dwt_interleave_partial_h(h.mem,
                                              h.cas,
-                                             &tiledp[j * w],
-                                             h.sn,
-                                             (OPJ_INT32)win_ll_x0,
-                                             (OPJ_INT32)win_ll_x1,
-                                             (OPJ_INT32)win_hl_x0,
-                                             (OPJ_INT32)win_hl_x1);
+                                             sa,
+                                             j,
+                                             (OPJ_UINT32)h.sn,
+                                             win_ll_x0,
+                                             win_ll_x1,
+                                             win_hl_x0,
+                                             win_hl_x1);
                 opj_dwt_decode_partial_1(h.mem, h.dn, h.sn, h.cas,
                                          (OPJ_INT32)win_ll_x0,
                                          (OPJ_INT32)win_ll_x1,
                                          (OPJ_INT32)win_hl_x0,
                                          (OPJ_INT32)win_hl_x1);
-                memcpy(&tiledp[j * w] + win_tr_x0, h.mem + win_tr_x0,
-                       (win_tr_x1 - win_tr_x0) * sizeof(OPJ_INT32));
+                if (!opj_sparse_array_int32_write(sa,
+                                                  win_tr_x0, j,
+                                                  win_tr_x1, j + 1,
+                                                  h.mem + win_tr_x0,
+                                                  1, 0, OPJ_TRUE)) {
+                    /* FIXME event manager error callback */
+                    opj_sparse_array_int32_free(sa);
+                    opj_aligned_free(h.mem);
+                    return OPJ_FALSE;
+                }
             }
         }
 
-        for (i = win_tr_x0; i < win_tr_x1; ++i) {
-            memset(v.mem, 0, (OPJ_UINT32)(v.sn + v.dn) * sizeof(OPJ_INT32));
+        for (i = win_tr_x0; i < win_tr_x1;) {
+            OPJ_UINT32 nb_cols = opj_uint_min(4U, win_tr_x1 - i);
             opj_dwt_interleave_partial_v(v.mem,
                                          v.cas,
-                                         tiledp + i,
-                                         v.sn,
-                                         (OPJ_INT32)w,
-                                         (OPJ_INT32)win_ll_y0,
-                                         (OPJ_INT32)win_ll_y1,
-                                         (OPJ_INT32)win_lh_y0,
-                                         (OPJ_INT32)win_lh_y1);
-            opj_dwt_decode_partial_1(v.mem, v.dn, v.sn, v.cas,
-                                     (OPJ_INT32)win_ll_y0,
-                                     (OPJ_INT32)win_ll_y1,
-                                     (OPJ_INT32)win_lh_y0,
-                                     (OPJ_INT32)win_lh_y1);
-            for (j = win_tr_y0; j < win_tr_y1; j++) {
-                tiledp[j * w + i] = v.mem[j];
+                                         sa,
+                                         i,
+                                         nb_cols,
+                                         (OPJ_UINT32)v.sn,
+                                         win_ll_y0,
+                                         win_ll_y1,
+                                         win_lh_y0,
+                                         win_lh_y1);
+            opj_dwt_decode_partial_1_parallel(v.mem, nb_cols, v.dn, v.sn, v.cas,
+                                              (OPJ_INT32)win_ll_y0,
+                                              (OPJ_INT32)win_ll_y1,
+                                              (OPJ_INT32)win_lh_y0,
+                                              (OPJ_INT32)win_lh_y1);
+            if (!opj_sparse_array_int32_write(sa,
+                                              i, win_tr_y0,
+                                              i + nb_cols, win_tr_y1,
+                                              v.mem + 4 * win_tr_y0,
+                                              1, 4, OPJ_TRUE)) {
+                /* FIXME event manager error callback */
+                opj_sparse_array_int32_free(sa);
+                opj_aligned_free(h.mem);
+                return OPJ_FALSE;
             }
+
+            i += nb_cols;
         }
     }
     opj_aligned_free(h.mem);
+
+    {
+        OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
+                       tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
+                       tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
+                       tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
+                       tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
+                       tilec->data_win,
+                       1, tr_max->win_x1 - tr_max->win_x0,
+                       OPJ_TRUE);
+        assert(ret);
+        OPJ_UNUSED(ret);
+    }
+    opj_sparse_array_int32_free(sa);
     return OPJ_TRUE;
 }
 
@@ -1881,8 +2152,8 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt,
     OPJ_UINT32 x1 = dwt->win_l_x1;
 
     for (k = 0; k < 2; ++k) {
-        if (remaining_height >= 4 && ((size_t) a & 0x0f) == 0 &&
-                ((size_t) bi & 0x0f) == 0 && (width & 0x0f) == 0) {
+        if (remaining_height >= 4 && ((OPJ_SIZE_T) a & 0x0f) == 0 &&
+                ((OPJ_SIZE_T) bi & 0x0f) == 0 && (width & 0x0f) == 0) {
             /* Fast code path */
             for (i = x0; i < x1; ++i) {
                 OPJ_UINT32 j = i;
@@ -1924,6 +2195,32 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt,
     }
 }
 
+static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt,
+        opj_sparse_array_int32_t* sa,
+        OPJ_UINT32 sa_line,
+        OPJ_UINT32 remaining_height)
+{
+    OPJ_UINT32 i;
+    for (i = 0; i < remaining_height; i++) {
+        OPJ_BOOL ret;
+        ret = opj_sparse_array_int32_read(sa,
+                                          dwt->win_l_x0, sa_line + i,
+                                          dwt->win_l_x1, sa_line + i + 1,
+                                          /* Nasty cast from float* to int32* */
+                                          (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i,
+                                          8, 0, OPJ_TRUE);
+        assert(ret);
+        ret = opj_sparse_array_int32_read(sa,
+                                          (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_line + i,
+                                          (OPJ_UINT32)dwt->sn + dwt->win_h_x1, sa_line + i + 1,
+                                          /* Nasty cast from float* to int32* */
+                                          (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i,
+                                          8, 0, OPJ_TRUE);
+        assert(ret);
+        OPJ_UNUSED(ret);
+    }
+}
+
 static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
                                    OPJ_FLOAT32* OPJ_RESTRICT a,
                                    OPJ_UINT32 width,
@@ -1933,17 +2230,40 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
     OPJ_UINT32 i;
 
     for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) {
-        memcpy(&bi[i * 2], &a[i * width], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
+        memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width],
+               (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32));
     }
 
-    a += (OPJ_UINT32)dwt->sn * width;
+    a += (OPJ_UINT32)dwt->sn * (OPJ_SIZE_T)width;
     bi = dwt->wavelet + 1 - dwt->cas;
 
     for (i = dwt->win_h_x0; i < dwt->win_h_x1; ++i) {
-        memcpy(&bi[i * 2], &a[i * width], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
+        memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width],
+               (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32));
     }
 }
 
+static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
+        opj_sparse_array_int32_t* sa,
+        OPJ_UINT32 sa_col,
+        OPJ_UINT32 nb_elts_read)
+{
+    OPJ_BOOL ret;
+    ret = opj_sparse_array_int32_read(sa,
+                                      sa_col, dwt->win_l_x0,
+                                      sa_col + nb_elts_read, dwt->win_l_x1,
+                                      (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0),
+                                      1, 8, OPJ_TRUE);
+    assert(ret);
+    ret = opj_sparse_array_int32_read(sa,
+                                      sa_col, (OPJ_UINT32)dwt->sn + dwt->win_h_x0,
+                                      sa_col + nb_elts_read, (OPJ_UINT32)dwt->sn + dwt->win_h_x1,
+                                      (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0),
+                                      1, 8, OPJ_TRUE);
+    assert(ret);
+    OPJ_UNUSED(ret);
+}
+
 #ifdef __SSE__
 
 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w,
@@ -1954,14 +2274,19 @@ static void opj_v4dwt_decode_step1_sse(opj_v4_t* w,
     __m128* OPJ_RESTRICT vw = (__m128*) w;
     OPJ_UINT32 i;
     /* 4x unrolled loop */
-    for (i = start; i + 3 < end; i += 4) {
-        vw[2 * i] = _mm_mul_ps(vw[2 * i], c);
-        vw[2 * i + 2] = _mm_mul_ps(vw[2 * i + 2], c);
-        vw[2 * i + 4] = _mm_mul_ps(vw[2 * i + 4], c);
-        vw[2 * i + 6] = _mm_mul_ps(vw[2 * i + 6], c);
-    }
-    for (; i < end; ++i) {
-        vw[2 * i] = _mm_mul_ps(vw[2 * i], c);
+    vw += 2 * start;
+    for (i = start; i + 3 < end; i += 4, vw += 8) {
+        __m128 xmm0 = _mm_mul_ps(vw[0], c);
+        __m128 xmm2 = _mm_mul_ps(vw[2], c);
+        __m128 xmm4 = _mm_mul_ps(vw[4], c);
+        __m128 xmm6 = _mm_mul_ps(vw[6], c);
+        vw[0] = xmm0;
+        vw[2] = xmm2;
+        vw[4] = xmm4;
+        vw[6] = xmm6;
+    }
+    for (; i < end; ++i, vw += 2) {
+        vw[0] = _mm_mul_ps(vw[0], c);
     }
 }
 
@@ -1982,7 +2307,29 @@ static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w,
         vw += start * 2;
         tmp1 = vw[-3];
     }
-    for (i = start; i < imax; ++i) {
+
+    i = start;
+
+    /* 4x loop unrolling */
+    for (; i + 3 < imax; i += 4) {
+        __m128 tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
+        tmp2 = vw[-1];
+        tmp3 = vw[ 0];
+        tmp4 = vw[ 1];
+        tmp5 = vw[ 2];
+        tmp6 = vw[ 3];
+        tmp7 = vw[ 4];
+        tmp8 = vw[ 5];
+        tmp9 = vw[ 6];
+        vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c));
+        vw[ 1] = _mm_add_ps(tmp4, _mm_mul_ps(_mm_add_ps(tmp3, tmp5), c));
+        vw[ 3] = _mm_add_ps(tmp6, _mm_mul_ps(_mm_add_ps(tmp5, tmp7), c));
+        vw[ 5] = _mm_add_ps(tmp8, _mm_mul_ps(_mm_add_ps(tmp7, tmp9), c));
+        tmp1 = tmp9;
+        vw += 8;
+    }
+
+    for (; i < imax; ++i) {
         tmp2 = vw[-1];
         tmp3 = vw[ 0];
         vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c));
@@ -2146,9 +2493,11 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
     OPJ_UINT32 rh = (OPJ_UINT32)(res->y1 -
                                  res->y0);    /* height of the resolution level computed */
 
-    OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+    OPJ_UINT32 w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions -
+                                                               1].x1 -
+                                tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
 
-    size_t l_data_size;
+    OPJ_SIZE_T l_data_size;
 
     l_data_size = opj_dwt_max_resolution(res, numres);
     /* overflow check */
@@ -2197,9 +2546,9 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
 
             for (k = 0; k < rw; k++) {
                 aj[k      ] = h.wavelet[k].f[0];
-                aj[k + w  ] = h.wavelet[k].f[1];
-                aj[k + w * 2] = h.wavelet[k].f[2];
-                aj[k + w * 3] = h.wavelet[k].f[3];
+                aj[k + (OPJ_SIZE_T)w  ] = h.wavelet[k].f[1];
+                aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
+                aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3];
             }
 
             aj += w * 4;
@@ -2212,10 +2561,10 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
             for (k = 0; k < rw; k++) {
                 switch (rh - j) {
                 case 3:
-                    aj[k + w * 2] = h.wavelet[k].f[2];
+                    aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
                 /* FALLTHRU */
                 case 2:
-                    aj[k + w  ] = h.wavelet[k].f[1];
+                    aj[k + (OPJ_SIZE_T)w  ] = h.wavelet[k].f[1];
                 /* FALLTHRU */
                 case 1:
                     aj[k] = h.wavelet[k].f[0];
@@ -2238,7 +2587,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
             opj_v4dwt_decode(&v);
 
             for (k = 0; k < rh; ++k) {
-                memcpy(&aj[k * w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
+                memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
             }
             aj += 4;
         }
@@ -2252,7 +2601,8 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
             opj_v4dwt_decode(&v);
 
             for (k = 0; k < rh; ++k) {
-                memcpy(&aj[k * w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32));
+                memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k],
+                       (OPJ_SIZE_T)j * sizeof(OPJ_FLOAT32));
             }
         }
     }
@@ -2262,10 +2612,10 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
 }
 
 static
-OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd,
-                                   opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
                                    OPJ_UINT32 numres)
 {
+    opj_sparse_array_int32_t* sa;
     opj_v4dwt_t h;
     opj_v4dwt_t v;
     OPJ_UINT32 resno;
@@ -2275,31 +2625,38 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd,
     const OPJ_UINT32 filter_width = 4U;
 
     opj_tcd_resolution_t* tr = tilec->resolutions;
+    opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]);
 
     OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 -
                                  tr->x0);    /* width of the resolution level computed */
     OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 -
                                  tr->y0);    /* height of the resolution level computed */
 
-    OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+    OPJ_SIZE_T l_data_size;
 
-    size_t l_data_size;
-
-    opj_image_comp_t* image_comp = &(tcd->image->comps[tilec->compno]);
     /* Compute the intersection of the area of interest, expressed in tile coordinates */
     /* with the tile coordinates */
-    OPJ_UINT32 win_tcx0 = opj_uint_max(
-                              (OPJ_UINT32)tilec->x0,
-                              opj_uint_ceildiv(tcd->decoded_x0, image_comp->dx));
-    OPJ_UINT32 win_tcy0 = opj_uint_max(
-                              (OPJ_UINT32)tilec->y0,
-                              opj_uint_ceildiv(tcd->decoded_y0, image_comp->dy));
-    OPJ_UINT32 win_tcx1 = opj_uint_min(
-                              (OPJ_UINT32)tilec->x1,
-                              opj_uint_ceildiv(tcd->decoded_x1, image_comp->dx));
-    OPJ_UINT32 win_tcy1 = opj_uint_min(
-                              (OPJ_UINT32)tilec->y1,
-                              opj_uint_ceildiv(tcd->decoded_y1, image_comp->dy));
+    OPJ_UINT32 win_tcx0 = tilec->win_x0;
+    OPJ_UINT32 win_tcy0 = tilec->win_y0;
+    OPJ_UINT32 win_tcx1 = tilec->win_x1;
+    OPJ_UINT32 win_tcy1 = tilec->win_y1;
+
+    sa = opj_dwt_init_sparse_array(tilec, numres);
+
+    if (numres == 1U) {
+        OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
+                       tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
+                       tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
+                       tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
+                       tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
+                       tilec->data_win,
+                       1, tr_max->win_x1 - tr_max->win_x0,
+                       OPJ_TRUE);
+        assert(ret);
+        OPJ_UNUSED(ret);
+        opj_sparse_array_int32_free(sa);
+        return OPJ_TRUE;
+    }
 
     l_data_size = opj_dwt_max_resolution(tr, numres);
     /* overflow check */
@@ -2320,8 +2677,7 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd,
     }
     v.wavelet = h.wavelet;
 
-    for (resno = 1; --numres; resno++) {
-        OPJ_FLOAT32 * OPJ_RESTRICT aj = (OPJ_FLOAT32*) tilec->data;
+    for (resno = 1; resno < numres; resno ++) {
         OPJ_UINT32 j;
         /* Window of interest subband-based coordinates */
         OPJ_UINT32 win_ll_x0, win_ll_y0, win_ll_x1, win_ll_y1;
@@ -2408,19 +2764,21 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd,
         h.win_l_x1 = win_ll_x1;
         h.win_h_x0 = win_hl_x0;
         h.win_h_x1 = win_hl_x1;
-        for (j = 0; j + 3 < rh; j += 4, aj += w * 4) {
+        for (j = 0; j + 3 < rh; j += 4) {
             if ((j + 3 >= win_ll_y0 && j < win_ll_y1) ||
                     (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn &&
                      j < win_lh_y1 + (OPJ_UINT32)v.sn)) {
-                OPJ_UINT32 k;
-                opj_v4dwt_interleave_h(&h, aj, w, rh - j);
+                opj_v4dwt_interleave_partial_h(&h, sa, j, opj_uint_min(4U, rh - j));
                 opj_v4dwt_decode(&h);
-
-                for (k = win_tr_x0; k < win_tr_x1; k++) {
-                    aj[k        ] = h.wavelet[k].f[0];
-                    aj[k + w    ] = h.wavelet[k].f[1];
-                    aj[k + w * 2] = h.wavelet[k].f[2];
-                    aj[k + w * 3] = h.wavelet[k].f[3];
+                if (!opj_sparse_array_int32_write(sa,
+                                                  win_tr_x0, j,
+                                                  win_tr_x1, j + 4,
+                                                  (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0],
+                                                  4, 1, OPJ_TRUE)) {
+                    /* FIXME event manager error callback */
+                    opj_sparse_array_int32_free(sa);
+                    opj_aligned_free(h.wavelet);
+                    return OPJ_FALSE;
                 }
             }
         }
@@ -2429,20 +2787,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd,
                 ((j + 3 >= win_ll_y0 && j < win_ll_y1) ||
                  (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn &&
                   j < win_lh_y1 + (OPJ_UINT32)v.sn))) {
-            OPJ_UINT32 k;
-            opj_v4dwt_interleave_h(&h, aj, w, rh - j);
+            opj_v4dwt_interleave_partial_h(&h, sa, j, rh - j);
             opj_v4dwt_decode(&h);
-            for (k = win_tr_x0; k < win_tr_x1; k++) {
-                switch (rh - j) {
-                case 3:
-                    aj[k + w * 2] = h.wavelet[k].f[2];
-                /* FALLTHRU */
-                case 2:
-                    aj[k + w    ] = h.wavelet[k].f[1];
-                /* FALLTHRU */
-                case 1:
-                    aj[k        ] = h.wavelet[k].f[0];
-                }
+            if (!opj_sparse_array_int32_write(sa,
+                                              win_tr_x0, j,
+                                              win_tr_x1, rh,
+                                              (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0],
+                                              4, 1, OPJ_TRUE)) {
+                /* FIXME event manager error callback */
+                opj_sparse_array_int32_free(sa);
+                opj_aligned_free(h.wavelet);
+                return OPJ_FALSE;
             }
         }
 
@@ -2450,21 +2805,39 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_t *tcd,
         v.win_l_x1 = win_ll_y1;
         v.win_h_x0 = win_lh_y0;
         v.win_h_x1 = win_lh_y1;
-        aj = (OPJ_FLOAT32*) tilec->data;
-        aj += win_tr_x0;
-        for (j = win_tr_x0; j < win_tr_x1; j += 4, aj += 4) {
+        for (j = win_tr_x0; j < win_tr_x1; j += 4) {
             OPJ_UINT32 nb_elts = opj_uint_min(4U, win_tr_x1 - j);
-            OPJ_UINT32 k;
 
-            opj_v4dwt_interleave_v(&v, aj, w, nb_elts);
+            opj_v4dwt_interleave_partial_v(&v, sa, j, nb_elts);
             opj_v4dwt_decode(&v);
 
-            for (k = win_tr_y0; k < win_tr_y1; ++k) {
-                memcpy(&aj[k * w], &v.wavelet[k], nb_elts * sizeof(OPJ_FLOAT32));
+            if (!opj_sparse_array_int32_write(sa,
+                                              j, win_tr_y0,
+                                              j + nb_elts, win_tr_y1,
+                                              (OPJ_INT32*)&h.wavelet[win_tr_y0].f[0],
+                                              1, 4, OPJ_TRUE)) {
+                /* FIXME event manager error callback */
+                opj_sparse_array_int32_free(sa);
+                opj_aligned_free(h.wavelet);
+                return OPJ_FALSE;
             }
         }
     }
 
+    {
+        OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
+                       tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
+                       tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
+                       tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
+                       tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
+                       tilec->data_win,
+                       1, tr_max->win_x1 - tr_max->win_x0,
+                       OPJ_TRUE);
+        assert(ret);
+        OPJ_UNUSED(ret);
+    }
+    opj_sparse_array_int32_free(sa);
+
     opj_aligned_free(h.wavelet);
     return OPJ_TRUE;
 }
@@ -2474,9 +2847,9 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd,
                              opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
                              OPJ_UINT32 numres)
 {
-    if (opj_dwt_is_whole_tile_decoding(p_tcd, tilec, numres)) {
+    if (p_tcd->whole_tile_decoding) {
         return opj_dwt_decode_tile_97(tilec, numres);
     } else {
-        return opj_dwt_decode_partial_97(p_tcd, tilec, numres);
+        return opj_dwt_decode_partial_97(tilec, numres);
     }
 }
diff --git a/src/lib/openjp2/dwt.h b/src/lib/openjp2/dwt.h
index a66ac71e0..4f63e524a 100644
--- a/src/lib/openjp2/dwt.h
+++ b/src/lib/openjp2/dwt.h
@@ -63,7 +63,7 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec);
 /**
 Inverse 5-3 wavelet transform in 2-D.
 Apply a reversible inverse DWT transform to a component of an image.
-@param tcd TCD handle
+@param p_tcd TCD handle
 @param tilec Tile component information (current tile)
 @param numres Number of resolution levels to decode
 */
@@ -93,7 +93,7 @@ OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec);
 /**
 Inverse 9-7 wavelet transform in 2-D.
 Apply an irreversible inverse DWT transform to a component of an image.
-@param tcd TCD handle
+@param p_tcd TCD handle
 @param tilec Tile component information (current tile)
 @param numres Number of resolution levels to decode
 */
diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c
index 4fd65872a..8e4b39666 100644
--- a/src/lib/openjp2/j2k.c
+++ b/src/lib/openjp2/j2k.c
@@ -49,8 +49,6 @@
 /** @name Local static functions */
 /*@{*/
 
-#define OPJ_UNUSED(x) (void)x
-
 /**
  * Sets up the procedures to do on reading header. Developpers wanting to extend the library can add their own reading procedures.
  */
@@ -371,7 +369,7 @@ static OPJ_BOOL opj_j2k_pre_write_tile(opj_j2k_t * p_j2k,
                                        opj_stream_private_t *p_stream,
                                        opj_event_mgr_t * p_manager);
 
-static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data,
+static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd,
         opj_image_t* p_output_image);
 
 static void opj_get_tile_dimensions(opj_image_t * l_image,
@@ -2144,13 +2142,6 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k,
         return OPJ_FALSE;
     }
 
-    /* testcase 1610.pdf.SIGSEGV.59c.681 */
-    if ((0xFFFFFFFFU / l_image->x1) < l_image->y1) {
-        opj_event_msg(p_manager, EVT_ERROR,
-                      "Prevent buffer overflow (x1: %d, y1: %d)\n", l_image->x1, l_image->y1);
-        return OPJ_FALSE;
-    }
-
     /* testcase issue427-illegal-tile-offset.jp2 */
     l_tx1 = opj_uint_adds(l_cp->tx0, l_cp->tdx); /* manage overflow */
     l_ty1 = opj_uint_adds(l_cp->ty0, l_cp->tdy); /* manage overflow */
@@ -6730,6 +6721,7 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
 {
     OPJ_UINT32 i, j, tileno, numpocs_tile;
     opj_cp_t *cp = 00;
+    OPJ_UINT32 cblkw, cblkh;
 
     if (!p_j2k || !parameters || ! image) {
         return OPJ_FALSE;
@@ -6743,6 +6735,38 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
         return OPJ_FALSE;
     }
 
+    if (parameters->cblockw_init < 4 || parameters->cblockw_init > 1024) {
+        opj_event_msg(p_manager, EVT_ERROR,
+                      "Invalid value for cblockw_init: %d not a power of 2 in range [4,1024]\n",
+                      parameters->cblockw_init);
+        return OPJ_FALSE;
+    }
+    if (parameters->cblockh_init < 4 || parameters->cblockh_init > 1024) {
+        opj_event_msg(p_manager, EVT_ERROR,
+                      "Invalid value for cblockh_init: %d not a power of 2 not in range [4,1024]\n",
+                      parameters->cblockh_init);
+        return OPJ_FALSE;
+    }
+    if (parameters->cblockw_init * parameters->cblockh_init > 4096) {
+        opj_event_msg(p_manager, EVT_ERROR,
+                      "Invalid value for cblockw_init * cblockh_init: should be <= 4096\n");
+        return OPJ_FALSE;
+    }
+    cblkw = (OPJ_UINT32)opj_int_floorlog2(parameters->cblockw_init);
+    cblkh = (OPJ_UINT32)opj_int_floorlog2(parameters->cblockh_init);
+    if (parameters->cblockw_init != (1 << cblkw)) {
+        opj_event_msg(p_manager, EVT_ERROR,
+                      "Invalid value for cblockw_init: %d not a power of 2 in range [4,1024]\n",
+                      parameters->cblockw_init);
+        return OPJ_FALSE;
+    }
+    if (parameters->cblockh_init != (1 << cblkh)) {
+        opj_event_msg(p_manager, EVT_ERROR,
+                      "Invalid value for cblockw_init: %d not a power of 2 in range [4,1024]\n",
+                      parameters->cblockh_init);
+        return OPJ_FALSE;
+    }
+
     /* keep a link to cp so that we can destroy it later in j2k_destroy_compress */
     cp = &(p_j2k->m_cp);
 
@@ -8756,9 +8780,13 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
 
     *p_tile_index = p_j2k->m_current_tile_number;
     *p_go_on = OPJ_TRUE;
-    *p_data_size = opj_tcd_get_decoded_tile_size(p_j2k->m_tcd);
-    if (*p_data_size == UINT_MAX) {
-        return OPJ_FALSE;
+    if (p_data_size) {
+        /* For internal use in j2k.c, we don't need this */
+        /* This is just needed for folks using the opj_read_tile_header() / opj_decode_tile_data() combo */
+        *p_data_size = opj_tcd_get_decoded_tile_size(p_j2k->m_tcd, OPJ_FALSE);
+        if (*p_data_size == UINT_MAX) {
+            return OPJ_FALSE;
+        }
     }
     *p_tile_x0 = p_j2k->m_tcd->tcd_image->tiles->x0;
     *p_tile_y0 = p_j2k->m_tcd->tcd_image->tiles->y0;
@@ -8869,26 +8897,24 @@ OPJ_BOOL opj_j2k_decode_tile(opj_j2k_t * p_j2k,
     return OPJ_TRUE;
 }
 
-static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data,
+static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd,
         opj_image_t* p_output_image)
 {
-    OPJ_UINT32 i, j, k = 0;
+    OPJ_UINT32 i, j;
     OPJ_UINT32 l_width_src, l_height_src;
     OPJ_UINT32 l_width_dest, l_height_dest;
     OPJ_INT32 l_offset_x0_src, l_offset_y0_src, l_offset_x1_src, l_offset_y1_src;
-    OPJ_SIZE_T l_start_offset_src, l_line_offset_src, l_end_offset_src ;
+    OPJ_SIZE_T l_start_offset_src;
     OPJ_UINT32 l_start_x_dest, l_start_y_dest;
     OPJ_UINT32 l_x0_dest, l_y0_dest, l_x1_dest, l_y1_dest;
-    OPJ_SIZE_T l_start_offset_dest, l_line_offset_dest;
+    OPJ_SIZE_T l_start_offset_dest;
 
     opj_image_comp_t * l_img_comp_src = 00;
     opj_image_comp_t * l_img_comp_dest = 00;
 
     opj_tcd_tilecomp_t * l_tilec = 00;
     opj_image_t * l_image_src = 00;
-    OPJ_UINT32 l_size_comp, l_remaining;
     OPJ_INT32 * l_dest_ptr;
-    opj_tcd_resolution_t* l_res = 00;
 
     l_tilec = p_tcd->tcd_image->tiles->comps;
     l_image_src = p_tcd->image;
@@ -8896,53 +8922,47 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data,
 
     l_img_comp_dest = p_output_image->comps;
 
-    for (i = 0; i < l_image_src->numcomps; i++) {
-
-        /* Allocate output component buffer if necessary */
-        if (!l_img_comp_dest->data) {
-            OPJ_SIZE_T l_width = l_img_comp_dest->w;
-            OPJ_SIZE_T l_height = l_img_comp_dest->h;
-
-            if ((l_height == 0U) || (l_width > (SIZE_MAX / l_height)) ||
-                    l_width * l_height > SIZE_MAX / sizeof(OPJ_INT32)) {
-                /* would overflow */
-                return OPJ_FALSE;
-            }
-            l_img_comp_dest->data = (OPJ_INT32*) opj_image_data_alloc(l_width * l_height *
-                                    sizeof(OPJ_INT32));
-            if (! l_img_comp_dest->data) {
-                return OPJ_FALSE;
-            }
-            /* Do we really need this memset ? */
-            memset(l_img_comp_dest->data, 0, l_width * l_height * sizeof(OPJ_INT32));
-        }
+    for (i = 0; i < l_image_src->numcomps;
+            i++, ++l_img_comp_dest, ++l_img_comp_src,  ++l_tilec) {
+        OPJ_INT32 res_x0, res_x1, res_y0, res_y1;
+        OPJ_UINT32 src_data_stride;
+        const OPJ_INT32* p_src_data;
 
         /* Copy info from decoded comp image to output image */
         l_img_comp_dest->resno_decoded = l_img_comp_src->resno_decoded;
 
-        /*-----*/
-        /* Compute the precision of the output buffer */
-        l_size_comp = l_img_comp_src->prec >> 3; /*(/ 8)*/
-        l_remaining = l_img_comp_src->prec & 7;  /* (%8) */
-        l_res = l_tilec->resolutions + l_img_comp_src->resno_decoded;
-
-        if (l_remaining) {
-            ++l_size_comp;
+        if (p_tcd->whole_tile_decoding) {
+            opj_tcd_resolution_t* l_res = l_tilec->resolutions +
+                                          l_img_comp_src->resno_decoded;
+            res_x0 = l_res->x0;
+            res_y0 = l_res->y0;
+            res_x1 = l_res->x1;
+            res_y1 = l_res->y1;
+            src_data_stride = (OPJ_UINT32)(
+                                  l_tilec->resolutions[l_tilec->minimum_num_resolutions - 1].x1 -
+                                  l_tilec->resolutions[l_tilec->minimum_num_resolutions - 1].x0);
+            p_src_data = l_tilec->data;
+        } else {
+            opj_tcd_resolution_t* l_res = l_tilec->resolutions +
+                                          l_img_comp_src->resno_decoded;
+            res_x0 = (OPJ_INT32)l_res->win_x0;
+            res_y0 = (OPJ_INT32)l_res->win_y0;
+            res_x1 = (OPJ_INT32)l_res->win_x1;
+            res_y1 = (OPJ_INT32)l_res->win_y1;
+            src_data_stride = l_res->win_x1 - l_res->win_x0;
+            p_src_data = l_tilec->data_win;
         }
 
-        if (l_size_comp == 3) {
-            l_size_comp = 4;
-        }
-        /*-----*/
+        l_width_src = (OPJ_UINT32)(res_x1 - res_x0);
+        l_height_src = (OPJ_UINT32)(res_y1 - res_y0);
+
 
         /* Current tile component size*/
         /*if (i == 0) {
         fprintf(stdout, "SRC: l_res_x0=%d, l_res_x1=%d, l_res_y0=%d, l_res_y1=%d\n",
-                        l_res->x0, l_res->x1, l_res->y0, l_res->y1);
+                        res_x0, res_x1, res_y0, res_y1);
         }*/
 
-        l_width_src = (OPJ_UINT32)(l_res->x1 - l_res->x0);
-        l_height_src = (OPJ_UINT32)(l_res->y1 - l_res->y0);
 
         /* Border of the current output component*/
         l_x0_dest = opj_uint_ceildivpow2(l_img_comp_dest->x0, l_img_comp_dest->factor);
@@ -8963,53 +8983,53 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data,
          * l_start_y_dest, l_width_dest, l_height_dest)  which will be modified
          * by this input area.
          * */
-        assert(l_res->x0 >= 0);
-        assert(l_res->x1 >= 0);
-        if (l_x0_dest < (OPJ_UINT32)l_res->x0) {
-            l_start_x_dest = (OPJ_UINT32)l_res->x0 - l_x0_dest;
+        assert(res_x0 >= 0);
+        assert(res_x1 >= 0);
+        if (l_x0_dest < (OPJ_UINT32)res_x0) {
+            l_start_x_dest = (OPJ_UINT32)res_x0 - l_x0_dest;
             l_offset_x0_src = 0;
 
-            if (l_x1_dest >= (OPJ_UINT32)l_res->x1) {
+            if (l_x1_dest >= (OPJ_UINT32)res_x1) {
                 l_width_dest = l_width_src;
                 l_offset_x1_src = 0;
             } else {
-                l_width_dest = l_x1_dest - (OPJ_UINT32)l_res->x0 ;
+                l_width_dest = l_x1_dest - (OPJ_UINT32)res_x0 ;
                 l_offset_x1_src = (OPJ_INT32)(l_width_src - l_width_dest);
             }
         } else {
             l_start_x_dest = 0U;
-            l_offset_x0_src = (OPJ_INT32)l_x0_dest - l_res->x0;
+            l_offset_x0_src = (OPJ_INT32)l_x0_dest - res_x0;
 
-            if (l_x1_dest >= (OPJ_UINT32)l_res->x1) {
+            if (l_x1_dest >= (OPJ_UINT32)res_x1) {
                 l_width_dest = l_width_src - (OPJ_UINT32)l_offset_x0_src;
                 l_offset_x1_src = 0;
             } else {
                 l_width_dest = l_img_comp_dest->w ;
-                l_offset_x1_src = l_res->x1 - (OPJ_INT32)l_x1_dest;
+                l_offset_x1_src = res_x1 - (OPJ_INT32)l_x1_dest;
             }
         }
 
-        if (l_y0_dest < (OPJ_UINT32)l_res->y0) {
-            l_start_y_dest = (OPJ_UINT32)l_res->y0 - l_y0_dest;
+        if (l_y0_dest < (OPJ_UINT32)res_y0) {
+            l_start_y_dest = (OPJ_UINT32)res_y0 - l_y0_dest;
             l_offset_y0_src = 0;
 
-            if (l_y1_dest >= (OPJ_UINT32)l_res->y1) {
+            if (l_y1_dest >= (OPJ_UINT32)res_y1) {
                 l_height_dest = l_height_src;
                 l_offset_y1_src = 0;
             } else {
-                l_height_dest = l_y1_dest - (OPJ_UINT32)l_res->y0 ;
+                l_height_dest = l_y1_dest - (OPJ_UINT32)res_y0 ;
                 l_offset_y1_src = (OPJ_INT32)(l_height_src - l_height_dest);
             }
         } else {
             l_start_y_dest = 0U;
-            l_offset_y0_src = (OPJ_INT32)l_y0_dest - l_res->y0;
+            l_offset_y0_src = (OPJ_INT32)l_y0_dest - res_y0;
 
-            if (l_y1_dest >= (OPJ_UINT32)l_res->y1) {
+            if (l_y1_dest >= (OPJ_UINT32)res_y1) {
                 l_height_dest = l_height_src - (OPJ_UINT32)l_offset_y0_src;
                 l_offset_y1_src = 0;
             } else {
                 l_height_dest = l_img_comp_dest->h ;
-                l_offset_y1_src = l_res->y1 - (OPJ_INT32)l_y1_dest;
+                l_offset_y1_src = res_y1 - (OPJ_INT32)l_y1_dest;
             }
         }
 
@@ -9025,119 +9045,64 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data,
 
         /* Compute the input buffer offset */
         l_start_offset_src = (OPJ_SIZE_T)l_offset_x0_src + (OPJ_SIZE_T)l_offset_y0_src
-                             * (OPJ_SIZE_T)l_width_src;
-        l_line_offset_src  = (OPJ_SIZE_T)l_offset_x1_src + (OPJ_SIZE_T)l_offset_x0_src;
-        l_end_offset_src   = (OPJ_SIZE_T)l_offset_y1_src * (OPJ_SIZE_T)l_width_src -
-                             (OPJ_SIZE_T)l_offset_x0_src;
+                             * (OPJ_SIZE_T)src_data_stride;
 
         /* Compute the output buffer offset */
         l_start_offset_dest = (OPJ_SIZE_T)l_start_x_dest + (OPJ_SIZE_T)l_start_y_dest
                               * (OPJ_SIZE_T)l_img_comp_dest->w;
-        l_line_offset_dest  = (OPJ_SIZE_T)l_img_comp_dest->w - (OPJ_SIZE_T)l_width_dest;
 
-        /* Move the output buffer to the first place where we will write*/
-        l_dest_ptr = l_img_comp_dest->data + l_start_offset_dest;
-
-        /*if (i == 0) {
-                fprintf(stdout, "COMPO[%d]:\n",i);
-                fprintf(stdout, "SRC: l_start_x_src=%d, l_start_y_src=%d, l_width_src=%d, l_height_src=%d\n"
-                                "\t tile offset:%d, %d, %d, %d\n"
-                                "\t buffer offset: %d; %d, %d\n",
-                                l_res->x0, l_res->y0, l_width_src, l_height_src,
-                                l_offset_x0_src, l_offset_y0_src, l_offset_x1_src, l_offset_y1_src,
-                                l_start_offset_src, l_line_offset_src, l_end_offset_src);
-
-                fprintf(stdout, "DEST: l_start_x_dest=%d, l_start_y_dest=%d, l_width_dest=%d, l_height_dest=%d\n"
-                                "\t start offset: %d, line offset= %d\n",
-                                l_start_x_dest, l_start_y_dest, l_width_dest, l_height_dest, l_start_offset_dest, l_line_offset_dest);
-        }*/
-
-        switch (l_size_comp) {
-        case 1: {
-            OPJ_CHAR * l_src_ptr = (OPJ_CHAR*) p_data;
-            l_src_ptr += l_start_offset_src; /* Move to the first place where we will read*/
-
-            if (l_img_comp_src->sgnd) {
-                for (j = 0 ; j < l_height_dest ; ++j) {
-                    for (k = 0 ; k < l_width_dest ; ++k) {
-                        *(l_dest_ptr++) = (OPJ_INT32)(*
-                                                      (l_src_ptr++));  /* Copy only the data needed for the output image */
-                    }
-
-                    l_dest_ptr +=
-                        l_line_offset_dest; /* Move to the next place where we will write */
-                    l_src_ptr += l_line_offset_src ; /* Move to the next place where we will read */
-                }
+        /* Allocate output component buffer if necessary */
+        if (l_img_comp_dest->data == NULL &&
+                l_start_offset_src == 0 && l_start_offset_dest == 0 &&
+                l_width_dest == l_img_comp_dest->w &&
+                l_height_dest == l_img_comp_dest->h) {
+            /* If the final image matches the tile buffer, then borrow it */
+            /* directly to save a copy */
+            if (p_tcd->whole_tile_decoding) {
+                l_img_comp_dest->data = l_tilec->data;
+                l_tilec->data = NULL;
             } else {
-                for (j = 0 ; j < l_height_dest ; ++j) {
-                    for (k = 0 ; k < l_width_dest ; ++k) {
-                        *(l_dest_ptr++) = (OPJ_INT32)((*(l_src_ptr++)) & 0xff);
-                    }
+                l_img_comp_dest->data = l_tilec->data_win;
+                l_tilec->data_win = NULL;
+            }
+            continue;
+        } else if (l_img_comp_dest->data == NULL) {
+            OPJ_SIZE_T l_width = l_img_comp_dest->w;
+            OPJ_SIZE_T l_height = l_img_comp_dest->h;
 
-                    l_dest_ptr += l_line_offset_dest;
-                    l_src_ptr += l_line_offset_src;
-                }
+            if ((l_height == 0U) || (l_width > (SIZE_MAX / l_height)) ||
+                    l_width * l_height > SIZE_MAX / sizeof(OPJ_INT32)) {
+                /* would overflow */
+                return OPJ_FALSE;
+            }
+            l_img_comp_dest->data = (OPJ_INT32*) opj_image_data_alloc(l_width * l_height *
+                                    sizeof(OPJ_INT32));
+            if (! l_img_comp_dest->data) {
+                return OPJ_FALSE;
             }
 
-            l_src_ptr +=
-                l_end_offset_src; /* Move to the end of this component-part of the input buffer */
-            p_data = (OPJ_BYTE*)
-                     l_src_ptr; /* Keep the current position for the next component-part */
+            if (l_img_comp_dest->w != l_width_dest ||
+                    l_img_comp_dest->h != l_height_dest) {
+                memset(l_img_comp_dest->data, 0,
+                       (OPJ_SIZE_T)l_img_comp_dest->w * l_img_comp_dest->h * sizeof(OPJ_INT32));
+            }
         }
-        break;
-        case 2: {
-            OPJ_INT16 * l_src_ptr = (OPJ_INT16 *) p_data;
-            l_src_ptr += l_start_offset_src;
-
-            if (l_img_comp_src->sgnd) {
-                for (j = 0; j < l_height_dest; ++j) {
-                    for (k = 0; k < l_width_dest; ++k) {
-                        OPJ_INT16 val;
-                        memcpy(&val, l_src_ptr, sizeof(val));
-                        l_src_ptr ++;
-                        *(l_dest_ptr++) = val;
-                    }
-
-                    l_dest_ptr += l_line_offset_dest;
-                    l_src_ptr += l_line_offset_src ;
-                }
-            } else {
-                for (j = 0; j < l_height_dest; ++j) {
-                    for (k = 0; k < l_width_dest; ++k) {
-                        OPJ_INT16 val;
-                        memcpy(&val, l_src_ptr, sizeof(val));
-                        l_src_ptr ++;
-                        *(l_dest_ptr++) = val & 0xffff;
-                    }
 
-                    l_dest_ptr += l_line_offset_dest;
-                    l_src_ptr += l_line_offset_src ;
-                }
-            }
+        /* Move the output buffer to the first place where we will write*/
+        l_dest_ptr = l_img_comp_dest->data + l_start_offset_dest;
 
-            l_src_ptr += l_end_offset_src;
-            p_data = (OPJ_BYTE*) l_src_ptr;
-        }
-        break;
-        case 4: {
-            OPJ_INT32 * l_src_ptr = (OPJ_INT32 *) p_data;
+        {
+            const OPJ_INT32 * l_src_ptr = p_src_data;
             l_src_ptr += l_start_offset_src;
 
             for (j = 0; j < l_height_dest; ++j) {
                 memcpy(l_dest_ptr, l_src_ptr, l_width_dest * sizeof(OPJ_INT32));
-                l_dest_ptr += l_width_dest + l_line_offset_dest;
-                l_src_ptr += l_width_dest + l_line_offset_src ;
+                l_dest_ptr += l_img_comp_dest->w;
+                l_src_ptr += src_data_stride;
             }
-
-            l_src_ptr += l_end_offset_src;
-            p_data = (OPJ_BYTE*) l_src_ptr;
-        }
-        break;
         }
 
-        ++l_img_comp_dest;
-        ++l_img_comp_src;
-        ++l_tilec;
+
     }
 
     return OPJ_TRUE;
@@ -9199,10 +9164,15 @@ OPJ_BOOL opj_j2k_set_decode_area(opj_j2k_t *p_j2k,
     OPJ_BOOL ret;
     OPJ_UINT32 it_comp;
 
+    if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 &&
+            &p_j2k->m_cp.tcps[0].m_data != NULL) {
+        /* In the case of a single-tiled image whose codestream we have already */
+        /* ingested, go on */
+    }
     /* Check if we are read the main header */
-    if (p_j2k->m_specific_param.m_decoder.m_state != J2K_STATE_TPHSOT) {
+    else if (p_j2k->m_specific_param.m_decoder.m_state != J2K_STATE_TPHSOT) {
         opj_event_msg(p_manager, EVT_ERROR,
-                      "Need to decode the main header before begin to decode the remaining codestream");
+                      "Need to decode the main header before begin to decode the remaining codestream.\n");
         return OPJ_FALSE;
     }
 
@@ -10515,10 +10485,8 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k,
 {
     OPJ_BOOL l_go_on = OPJ_TRUE;
     OPJ_UINT32 l_current_tile_no;
-    OPJ_UINT32 l_data_size, l_max_data_size;
     OPJ_INT32 l_tile_x0, l_tile_y0, l_tile_x1, l_tile_y1;
     OPJ_UINT32 l_nb_comps;
-    OPJ_BYTE * l_current_data;
     OPJ_UINT32 nr_tiles = 0;
 
     /* Particular case for whole single tile decoding */
@@ -10528,12 +10496,11 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k,
             p_j2k->m_output_image->x0 == 0 &&
             p_j2k->m_output_image->y0 == 0 &&
             p_j2k->m_output_image->x1 == p_j2k->m_cp.tdx &&
-            p_j2k->m_output_image->y1 == p_j2k->m_cp.tdy &&
-            p_j2k->m_output_image->comps[0].factor == 0) {
+            p_j2k->m_output_image->y1 == p_j2k->m_cp.tdy) {
         OPJ_UINT32 i;
         if (! opj_j2k_read_tile_header(p_j2k,
                                        &l_current_tile_no,
-                                       &l_data_size,
+                                       NULL,
                                        &l_tile_x0, &l_tile_y0,
                                        &l_tile_x1, &l_tile_y1,
                                        &l_nb_comps,
@@ -10562,59 +10529,55 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k,
         return OPJ_TRUE;
     }
 
-    l_current_data = (OPJ_BYTE*)opj_malloc(1000);
-    if (! l_current_data) {
-        opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode tiles\n");
-        return OPJ_FALSE;
-    }
-    l_max_data_size = 1000;
-
     for (;;) {
-        if (! opj_j2k_read_tile_header(p_j2k,
-                                       &l_current_tile_no,
-                                       &l_data_size,
-                                       &l_tile_x0, &l_tile_y0,
-                                       &l_tile_x1, &l_tile_y1,
-                                       &l_nb_comps,
-                                       &l_go_on,
-                                       p_stream,
-                                       p_manager)) {
-            opj_free(l_current_data);
-            return OPJ_FALSE;
-        }
-
-        if (! l_go_on) {
-            break;
-        }
-
-        if (l_data_size > l_max_data_size) {
-            OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data,
-                                           l_data_size);
-            if (! l_new_current_data) {
-                opj_free(l_current_data);
-                opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode tile %d/%d\n",
-                              l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw);
+        if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 &&
+                p_j2k->m_cp.tcps[0].m_data != NULL) {
+            l_current_tile_no = 0;
+            p_j2k->m_current_tile_number = 0;
+            p_j2k->m_specific_param.m_decoder.m_state |= J2K_STATE_DATA;
+        } else {
+            if (! opj_j2k_read_tile_header(p_j2k,
+                                           &l_current_tile_no,
+                                           NULL,
+                                           &l_tile_x0, &l_tile_y0,
+                                           &l_tile_x1, &l_tile_y1,
+                                           &l_nb_comps,
+                                           &l_go_on,
+                                           p_stream,
+                                           p_manager)) {
                 return OPJ_FALSE;
             }
-            l_current_data = l_new_current_data;
-            l_max_data_size = l_data_size;
+
+            if (! l_go_on) {
+                break;
+            }
         }
 
-        if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, l_current_data, l_data_size,
+        if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0,
                                   p_stream, p_manager)) {
-            opj_free(l_current_data);
             opj_event_msg(p_manager, EVT_ERROR, "Failed to decode tile %d/%d\n",
                           l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw);
             return OPJ_FALSE;
         }
+
         opj_event_msg(p_manager, EVT_INFO, "Tile %d/%d has been decoded.\n",
                       l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw);
 
-        if (! opj_j2k_update_image_data(p_j2k->m_tcd, l_current_data,
+        if (! opj_j2k_update_image_data(p_j2k->m_tcd,
                                         p_j2k->m_output_image)) {
-            opj_free(l_current_data);
             return OPJ_FALSE;
         }
+
+        if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 &&
+                !(p_j2k->m_output_image->x0 == p_j2k->m_private_image->x0 &&
+                  p_j2k->m_output_image->y0 == p_j2k->m_private_image->y0 &&
+                  p_j2k->m_output_image->x1 == p_j2k->m_private_image->x1 &&
+                  p_j2k->m_output_image->y1 == p_j2k->m_private_image->y1)) {
+            /* Keep current tcp data */
+        } else {
+            opj_j2k_tcp_data_destroy(&p_j2k->m_cp.tcps[l_current_tile_no]);
+        }
+
         opj_event_msg(p_manager, EVT_INFO,
                       "Image data has been updated with tile %d.\n\n", l_current_tile_no + 1);
 
@@ -10627,8 +10590,6 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k,
         }
     }
 
-    opj_free(l_current_data);
-
     return OPJ_TRUE;
 }
 
@@ -10661,24 +10622,14 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k,
     OPJ_BOOL l_go_on = OPJ_TRUE;
     OPJ_UINT32 l_current_tile_no;
     OPJ_UINT32 l_tile_no_to_dec;
-    OPJ_UINT32 l_data_size, l_max_data_size;
     OPJ_INT32 l_tile_x0, l_tile_y0, l_tile_x1, l_tile_y1;
     OPJ_UINT32 l_nb_comps;
-    OPJ_BYTE * l_current_data;
     OPJ_UINT32 l_nb_tiles;
     OPJ_UINT32 i;
 
-    l_current_data = (OPJ_BYTE*)opj_malloc(1000);
-    if (! l_current_data) {
-        opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode one tile\n");
-        return OPJ_FALSE;
-    }
-    l_max_data_size = 1000;
-
     /*Allocate and initialize some elements of codestrem index if not already done*/
     if (!p_j2k->cstr_index->tile_index) {
         if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) {
-            opj_free(l_current_data);
             return OPJ_FALSE;
         }
     }
@@ -10693,7 +10644,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k,
                 if (!(opj_stream_read_seek(p_stream,
                                            p_j2k->m_specific_param.m_decoder.m_last_sot_read_pos + 2, p_manager))) {
                     opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n");
-                    opj_free(l_current_data);
                     return OPJ_FALSE;
                 }
             } else {
@@ -10701,7 +10651,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k,
                                            p_j2k->cstr_index->tile_index[l_tile_no_to_dec].tp_index[0].start_pos + 2,
                                            p_manager))) {
                     opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n");
-                    opj_free(l_current_data);
                     return OPJ_FALSE;
                 }
             }
@@ -10723,14 +10672,13 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k,
     for (;;) {
         if (! opj_j2k_read_tile_header(p_j2k,
                                        &l_current_tile_no,
-                                       &l_data_size,
+                                       NULL,
                                        &l_tile_x0, &l_tile_y0,
                                        &l_tile_x1, &l_tile_y1,
                                        &l_nb_comps,
                                        &l_go_on,
                                        p_stream,
                                        p_manager)) {
-            opj_free(l_current_data);
             return OPJ_FALSE;
         }
 
@@ -10738,33 +10686,19 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k,
             break;
         }
 
-        if (l_data_size > l_max_data_size) {
-            OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data,
-                                           l_data_size);
-            if (! l_new_current_data) {
-                opj_free(l_current_data);
-                l_current_data = NULL;
-                opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode tile %d/%d\n",
-                              l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw);
-                return OPJ_FALSE;
-            }
-            l_current_data = l_new_current_data;
-            l_max_data_size = l_data_size;
-        }
-
-        if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, l_current_data, l_data_size,
+        if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0,
                                   p_stream, p_manager)) {
-            opj_free(l_current_data);
             return OPJ_FALSE;
         }
         opj_event_msg(p_manager, EVT_INFO, "Tile %d/%d has been decoded.\n",
                       l_current_tile_no + 1, p_j2k->m_cp.th * p_j2k->m_cp.tw);
 
-        if (! opj_j2k_update_image_data(p_j2k->m_tcd, l_current_data,
+        if (! opj_j2k_update_image_data(p_j2k->m_tcd,
                                         p_j2k->m_output_image)) {
-            opj_free(l_current_data);
             return OPJ_FALSE;
         }
+        opj_j2k_tcp_data_destroy(&p_j2k->m_cp.tcps[l_current_tile_no]);
+
         opj_event_msg(p_manager, EVT_INFO,
                       "Image data has been updated with tile %d.\n\n", l_current_tile_no + 1);
 
@@ -10773,7 +10707,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k,
             if (!(opj_stream_read_seek(p_stream, p_j2k->cstr_index->main_head_end + 2,
                                        p_manager))) {
                 opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n");
-                opj_free(l_current_data);
                 return OPJ_FALSE;
             }
             break;
@@ -10785,8 +10718,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k,
 
     }
 
-    opj_free(l_current_data);
-
     return OPJ_TRUE;
 }
 
@@ -10845,9 +10776,11 @@ OPJ_BOOL opj_j2k_decode(opj_j2k_t * p_j2k,
         }
     }
 
-    p_j2k->m_output_image = opj_image_create0();
-    if (!(p_j2k->m_output_image)) {
-        return OPJ_FALSE;
+    if (p_j2k->m_output_image == NULL) {
+        p_j2k->m_output_image = opj_image_create0();
+        if (!(p_j2k->m_output_image)) {
+            return OPJ_FALSE;
+        }
     }
     opj_copy_image_header(p_image, p_j2k->m_output_image);
 
@@ -10867,6 +10800,7 @@ OPJ_BOOL opj_j2k_decode(opj_j2k_t * p_j2k,
     for (compno = 0; compno < p_image->numcomps; compno++) {
         p_image->comps[compno].resno_decoded =
             p_j2k->m_output_image->comps[compno].resno_decoded;
+        opj_image_data_free(p_image->comps[compno].data);
         p_image->comps[compno].data = p_j2k->m_output_image->comps[compno].data;
 #if 0
         char fn[256];
@@ -11029,7 +10963,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
 {
     OPJ_UINT32 i, j;
     OPJ_UINT32 l_nb_tiles;
-    OPJ_UINT32 l_max_tile_size = 0, l_current_tile_size;
+    OPJ_SIZE_T l_max_tile_size = 0, l_current_tile_size;
     OPJ_BYTE * l_current_data = 00;
     OPJ_BOOL l_reuse_data = OPJ_FALSE;
     opj_tcd_t* p_tcd = 00;
diff --git a/src/lib/openjp2/mct.c b/src/lib/openjp2/mct.c
index 8c82ee20a..b79d4b87c 100644
--- a/src/lib/openjp2/mct.c
+++ b/src/lib/openjp2/mct.c
@@ -77,7 +77,7 @@ void opj_mct_encode(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_SIZE_T i;
     const OPJ_SIZE_T len = n;
@@ -119,7 +119,7 @@ void opj_mct_encode(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_SIZE_T i;
     const OPJ_SIZE_T len = n;
@@ -146,7 +146,7 @@ void opj_mct_decode(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_SIZE_T i;
     const OPJ_SIZE_T len = n;
@@ -181,7 +181,7 @@ void opj_mct_decode(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_UINT32 i;
     for (i = 0; i < n; ++i) {
@@ -214,7 +214,7 @@ void opj_mct_encode_real(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_SIZE_T i;
     const OPJ_SIZE_T len = n;
@@ -359,7 +359,7 @@ void opj_mct_encode_real(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_UINT32 i;
     for (i = 0; i < n; ++i) {
@@ -386,7 +386,7 @@ void opj_mct_decode_real(
     OPJ_FLOAT32* OPJ_RESTRICT c0,
     OPJ_FLOAT32* OPJ_RESTRICT c1,
     OPJ_FLOAT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_UINT32 i;
 #ifdef __SSE__
@@ -451,13 +451,13 @@ OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno)
 
 OPJ_BOOL opj_mct_encode_custom(
     OPJ_BYTE * pCodingdata,
-    OPJ_UINT32 n,
+    OPJ_SIZE_T n,
     OPJ_BYTE ** pData,
     OPJ_UINT32 pNbComp,
     OPJ_UINT32 isSigned)
 {
     OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
-    OPJ_UINT32 i;
+    OPJ_SIZE_T i;
     OPJ_UINT32 j;
     OPJ_UINT32 k;
     OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
@@ -505,13 +505,13 @@ OPJ_BOOL opj_mct_encode_custom(
 
 OPJ_BOOL opj_mct_decode_custom(
     OPJ_BYTE * pDecodingData,
-    OPJ_UINT32 n,
+    OPJ_SIZE_T n,
     OPJ_BYTE ** pData,
     OPJ_UINT32 pNbComp,
     OPJ_UINT32 isSigned)
 {
     OPJ_FLOAT32 * lMct;
-    OPJ_UINT32 i;
+    OPJ_SIZE_T i;
     OPJ_UINT32 j;
     OPJ_UINT32 k;
 
diff --git a/src/lib/openjp2/mct.h b/src/lib/openjp2/mct.h
index 0ed980e89..2e37ce733 100644
--- a/src/lib/openjp2/mct.h
+++ b/src/lib/openjp2/mct.h
@@ -61,7 +61,7 @@ Apply a reversible multi-component transform to an image
 @param n Number of samples for each component
 */
 void opj_mct_encode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1,
-                    OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n);
+                    OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
 /**
 Apply a reversible multi-component inverse transform to an image
 @param c0 Samples for luminance component
@@ -70,7 +70,7 @@ Apply a reversible multi-component inverse transform to an image
 @param n Number of samples for each component
 */
 void opj_mct_decode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1,
-                    OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n);
+                    OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
 /**
 Get norm of the basis function used for the reversible multi-component transform
 @param compno Number of the component (0->Y, 1->U, 2->V)
@@ -86,7 +86,7 @@ Apply an irreversible multi-component transform to an image
 @param n Number of samples for each component
 */
 void opj_mct_encode_real(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1,
-                         OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n);
+                         OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
 /**
 Apply an irreversible multi-component inverse transform to an image
 @param c0 Samples for luminance component
@@ -95,7 +95,7 @@ Apply an irreversible multi-component inverse transform to an image
 @param n Number of samples for each component
 */
 void opj_mct_decode_real(OPJ_FLOAT32* OPJ_RESTRICT c0,
-                         OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_UINT32 n);
+                         OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
 /**
 Get norm of the basis function used for the irreversible multi-component transform
 @param compno Number of the component (0->Y, 1->U, 2->V)
@@ -114,7 +114,7 @@ FIXME DOC
 */
 OPJ_BOOL opj_mct_encode_custom(
     OPJ_BYTE * p_coding_data,
-    OPJ_UINT32 n,
+    OPJ_SIZE_T n,
     OPJ_BYTE ** p_data,
     OPJ_UINT32 p_nb_comp,
     OPJ_UINT32 is_signed);
@@ -129,7 +129,7 @@ FIXME DOC
 */
 OPJ_BOOL opj_mct_decode_custom(
     OPJ_BYTE * pDecodingData,
-    OPJ_UINT32 n,
+    OPJ_SIZE_T n,
     OPJ_BYTE ** pData,
     OPJ_UINT32 pNbComp,
     OPJ_UINT32 isSigned);
diff --git a/src/lib/openjp2/openjpeg.h b/src/lib/openjp2/openjpeg.h
index 21755b48d..7020d37d2 100644
--- a/src/lib/openjp2/openjpeg.h
+++ b/src/lib/openjp2/openjpeg.h
@@ -1340,6 +1340,12 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_read_header(opj_stream_t *p_stream,
  * that is to say at the highest resolution level, even if requesting the image at lower
  * resolution levels.
  *
+ * Generally opj_set_decode_area() should be followed by opj_decode(), and the
+ * codec cannot be re-used.
+ * In the particular case of an image made of a single tile, several sequences of
+ * calls to opoj_set_decode_area() and opj_decode() are allowed, and will bring
+ * performance improvements when reading an image by chunks.
+ *
  * @param   p_codec         the jpeg2000 codec.
  * @param   p_image         the decoded image previously setted by opj_read_header
  * @param   p_start_x       the left position of the rectangle to decode (in image coordinates).
diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h
index b33e63cef..0a8628c96 100644
--- a/src/lib/openjp2/opj_includes.h
+++ b/src/lib/openjp2/opj_includes.h
@@ -216,6 +216,8 @@ static INLINE long opj_lrintf(float f)
 /* Type to use for bit-fields in internal headers */
 typedef unsigned int OPJ_BITFIELD;
 
+#define OPJ_UNUSED(x) (void)x
+
 #include "opj_inttypes.h"
 #include "opj_clock.h"
 #include "opj_malloc.h"
@@ -243,6 +245,7 @@ typedef unsigned int OPJ_BITFIELD;
 #include "t2.h"
 #include "mct.h"
 #include "opj_intmath.h"
+#include "sparse_array.h"
 
 #ifdef USE_JPIP
 #include "cidx_manager.h"
diff --git a/src/lib/openjp2/opj_intmath.h b/src/lib/openjp2/opj_intmath.h
index d7d85e52c..ad1359766 100644
--- a/src/lib/openjp2/opj_intmath.h
+++ b/src/lib/openjp2/opj_intmath.h
@@ -124,6 +124,28 @@ static INLINE OPJ_INT32 opj_int_clamp(OPJ_INT32 a, OPJ_INT32 min,
     }
     return a;
 }
+
+/**
+Clamp an integer inside an interval
+@return
+<ul>
+<li>Returns a if (min < a < max)
+<li>Returns max if (a > max)
+<li>Returns min if (a < min)
+</ul>
+*/
+static INLINE OPJ_INT64 opj_int64_clamp(OPJ_INT64 a, OPJ_INT64 min,
+                                        OPJ_INT64 max)
+{
+    if (a < min) {
+        return min;
+    }
+    if (a > max) {
+        return max;
+    }
+    return a;
+}
+
 /**
 @return Get absolute value of integer
 */
diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c
new file mode 100644
index 000000000..6a2d8d434
--- /dev/null
+++ b/src/lib/openjp2/sparse_array.c
@@ -0,0 +1,343 @@
+/*
+ * The copyright in this software is being made available under the 2-clauses
+ * BSD License, included below. This software may be subject to other third
+ * party and contributor rights, including patent rights, and no such rights
+ * are granted under this license.
+ *
+ * Copyright (c) 2017, IntoPix SA <contact@intopix.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opj_includes.h"
+
+
+struct opj_sparse_array_int32 {
+    OPJ_UINT32 width;
+    OPJ_UINT32 height;
+    OPJ_UINT32 block_width;
+    OPJ_UINT32 block_height;
+    OPJ_UINT32 block_count_hor;
+    OPJ_UINT32 block_count_ver;
+    OPJ_INT32** data_blocks;
+};
+
+opj_sparse_array_int32_t* opj_sparse_array_int32_create(OPJ_UINT32 width,
+        OPJ_UINT32 height,
+        OPJ_UINT32 block_width,
+        OPJ_UINT32 block_height)
+{
+    opj_sparse_array_int32_t* sa;
+
+    if (width == 0 || height == 0 || block_width == 0 || block_height == 0) {
+        return NULL;
+    }
+    if (block_width > ((OPJ_UINT32)~0U) / block_height / sizeof(OPJ_INT32)) {
+        return NULL;
+    }
+
+    sa = opj_calloc(1, sizeof(opj_sparse_array_int32_t));
+    sa->width = width;
+    sa->height = height;
+    sa->block_width = block_width;
+    sa->block_height = block_height;
+    sa->block_count_hor = opj_uint_ceildiv(width, block_width);
+    sa->block_count_ver = opj_uint_ceildiv(height, block_height);
+    if (sa->block_count_hor > ((OPJ_UINT32)~0U) / sa->block_count_ver) {
+        opj_free(sa);
+        return NULL;
+    }
+    sa->data_blocks = opj_calloc(sizeof(OPJ_INT32*),
+                                 sa->block_count_hor * sa->block_count_ver);
+    if (sa->data_blocks == NULL) {
+        opj_free(sa);
+        return NULL;
+    }
+
+    return sa;
+}
+
+void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa)
+{
+    if (sa) {
+        OPJ_UINT32 i;
+        for (i = 0; i < sa->block_count_hor * sa->block_count_ver; i++) {
+            if (sa->data_blocks[i]) {
+                opj_free(sa->data_blocks[i]);
+            }
+        }
+        opj_free(sa->data_blocks);
+        opj_free(sa);
+    }
+}
+
+OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa,
+        OPJ_UINT32 x0,
+        OPJ_UINT32 y0,
+        OPJ_UINT32 x1,
+        OPJ_UINT32 y1)
+{
+    return !(x0 >= sa->width || x1 <= x0 || x1 > sa->width ||
+             y0 >= sa->height || y1 <= y0 || y1 > sa->height);
+}
+
+static OPJ_BOOL opj_sparse_array_int32_read_or_write(
+    const opj_sparse_array_int32_t* sa,
+    OPJ_UINT32 x0,
+    OPJ_UINT32 y0,
+    OPJ_UINT32 x1,
+    OPJ_UINT32 y1,
+    OPJ_INT32* buf,
+    OPJ_UINT32 buf_col_stride,
+    OPJ_UINT32 buf_line_stride,
+    OPJ_BOOL forgiving,
+    OPJ_BOOL is_read_op)
+{
+    OPJ_UINT32 y, block_y;
+    OPJ_UINT32 y_incr = 0;
+    const OPJ_UINT32 block_width = sa->block_width;
+
+    if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) {
+        return forgiving;
+    }
+
+    block_y = y0 / sa->block_height;
+    for (y = y0; y < y1; block_y ++, y += y_incr) {
+        OPJ_UINT32 x, block_x;
+        OPJ_UINT32 x_incr = 0;
+        OPJ_UINT32 block_y_offset;
+        y_incr = (y == y0) ? sa->block_height - (y0 % sa->block_height) :
+                 sa->block_height;
+        block_y_offset = sa->block_height - y_incr;
+        y_incr = opj_uint_min(y_incr, y1 - y);
+        block_x = x0 / block_width;
+        for (x = x0; x < x1; block_x ++, x += x_incr) {
+            OPJ_UINT32 j;
+            OPJ_UINT32 block_x_offset;
+            OPJ_INT32* src_block;
+            x_incr = (x == x0) ? block_width - (x0 % block_width) : block_width;
+            block_x_offset = block_width - x_incr;
+            x_incr = opj_uint_min(x_incr, x1 - x);
+            src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x];
+            if (is_read_op) {
+                if (src_block == NULL) {
+                    if (buf_col_stride == 1) {
+                        OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride +
+                                              (x - x0) * buf_col_stride;
+                        for (j = 0; j < y_incr; j++) {
+                            memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr);
+                            dest_ptr += buf_line_stride;
+                        }
+                    } else {
+                        OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride +
+                                              (x - x0) * buf_col_stride;
+                        for (j = 0; j < y_incr; j++) {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < x_incr; k++) {
+                                dest_ptr[k * buf_col_stride] = 0;
+                            }
+                            dest_ptr += buf_line_stride;
+                        }
+                    }
+                } else {
+                    const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset *
+                                                            (OPJ_SIZE_T)block_width + block_x_offset;
+                    if (buf_col_stride == 1) {
+                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride
+                                                           +
+                                                           (x - x0) * buf_col_stride;
+                        if (x_incr == 4) {
+                            // Same code as general branch, but the compiler
+                            // can have an efficient memcpy()
+                            for (j = 0; j < y_incr; j++) {
+                                memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
+                        } else {
+                            for (j = 0; j < y_incr; j++) {
+                                memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
+                        }
+                    } else {
+                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride
+                                                           +
+                                                           (x - x0) * buf_col_stride;
+                        if (x_incr == 1) {
+                            for (j = 0; j < y_incr; j++) {
+                                *dest_ptr = *src_ptr;
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
+                        } else if (y_incr == 1 && buf_col_stride == 2) {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < (x_incr & ~3U); k += 4) {
+                                dest_ptr[k * buf_col_stride] = src_ptr[k];
+                                dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1];
+                                dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2];
+                                dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3];
+                            }
+                            for (; k < x_incr; k++) {
+                                dest_ptr[k * buf_col_stride] = src_ptr[k];
+                            }
+                        } else if (x_incr >= 8 && buf_col_stride == 8) {
+                            for (j = 0; j < y_incr; j++) {
+                                OPJ_UINT32 k;
+                                for (k = 0; k < (x_incr & ~3U); k += 4) {
+                                    dest_ptr[k * buf_col_stride] = src_ptr[k];
+                                    dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1];
+                                    dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2];
+                                    dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3];
+                                }
+                                for (; k < x_incr; k++) {
+                                    dest_ptr[k * buf_col_stride] = src_ptr[k];
+                                }
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
+                        } else {
+                            /* General case */
+                            for (j = 0; j < y_incr; j++) {
+                                OPJ_UINT32 k;
+                                for (k = 0; k < x_incr; k++) {
+                                    dest_ptr[k * buf_col_stride] = src_ptr[k];
+                                }
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
+                        }
+                    }
+                }
+            } else {
+                if (src_block == NULL) {
+                    src_block = opj_calloc(1,
+                                           sa->block_width * sa->block_height * sizeof(OPJ_INT32));
+                    if (src_block == NULL) {
+                        return OPJ_FALSE;
+                    }
+                    sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block;
+                }
+
+                if (buf_col_stride == 1) {
+                    OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
+                                                       (OPJ_SIZE_T)block_width + block_x_offset;
+                    const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
+                                                            (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride;
+                    if (x_incr == 4) {
+                        // Same code as general branch, but the compiler
+                        // can have an efficient memcpy()
+                        for (j = 0; j < y_incr; j++) {
+                            memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                            dest_ptr += block_width;
+                            src_ptr += buf_line_stride;
+                        }
+                    } else {
+                        for (j = 0; j < y_incr; j++) {
+                            memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                            dest_ptr += block_width;
+                            src_ptr += buf_line_stride;
+                        }
+                    }
+                } else {
+                    OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
+                                                       (OPJ_SIZE_T)block_width + block_x_offset;
+                    const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
+                                                            (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride;
+                    if (x_incr == 1) {
+                        for (j = 0; j < y_incr; j++) {
+                            *dest_ptr = *src_ptr;
+                            src_ptr += buf_line_stride;
+                            dest_ptr += block_width;
+                        }
+                    } else if (x_incr >= 8 && buf_col_stride == 8) {
+                        for (j = 0; j < y_incr; j++) {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < (x_incr & ~3U); k += 4) {
+                                dest_ptr[k] = src_ptr[k * buf_col_stride];
+                                dest_ptr[k + 1] = src_ptr[(k + 1) * buf_col_stride];
+                                dest_ptr[k + 2] = src_ptr[(k + 2) * buf_col_stride];
+                                dest_ptr[k + 3] = src_ptr[(k + 3) * buf_col_stride];
+                            }
+                            for (; k < x_incr; k++) {
+                                dest_ptr[k] = src_ptr[k * buf_col_stride];
+                            }
+                            src_ptr += buf_line_stride;
+                            dest_ptr += block_width;
+                        }
+                    } else {
+                        /* General case */
+                        for (j = 0; j < y_incr; j++) {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < x_incr; k++) {
+                                dest_ptr[k] = src_ptr[k * buf_col_stride];
+                            }
+                            src_ptr += buf_line_stride;
+                            dest_ptr += block_width;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return OPJ_TRUE;
+}
+
+OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa,
+                                     OPJ_UINT32 x0,
+                                     OPJ_UINT32 y0,
+                                     OPJ_UINT32 x1,
+                                     OPJ_UINT32 y1,
+                                     OPJ_INT32* dest,
+                                     OPJ_UINT32 dest_col_stride,
+                                     OPJ_UINT32 dest_line_stride,
+                                     OPJ_BOOL forgiving)
+{
+    return opj_sparse_array_int32_read_or_write(
+               (opj_sparse_array_int32_t*)sa, x0, y0, x1, y1,
+               dest,
+               dest_col_stride,
+               dest_line_stride,
+               forgiving,
+               OPJ_TRUE);
+}
+
+OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa,
+                                      OPJ_UINT32 x0,
+                                      OPJ_UINT32 y0,
+                                      OPJ_UINT32 x1,
+                                      OPJ_UINT32 y1,
+                                      const OPJ_INT32* src,
+                                      OPJ_UINT32 src_col_stride,
+                                      OPJ_UINT32 src_line_stride,
+                                      OPJ_BOOL forgiving)
+{
+    return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1,
+            (OPJ_INT32*)src,
+            src_col_stride,
+            src_line_stride,
+            forgiving,
+            OPJ_FALSE);
+}
diff --git a/src/lib/openjp2/sparse_array.h b/src/lib/openjp2/sparse_array.h
new file mode 100644
index 000000000..130fe13ef
--- /dev/null
+++ b/src/lib/openjp2/sparse_array.h
@@ -0,0 +1,141 @@
+/*
+ * The copyright in this software is being made available under the 2-clauses
+ * BSD License, included below. This software may be subject to other third
+ * party and contributor rights, including patent rights, and no such rights
+ * are granted under this license.
+ *
+ * Copyright (c) 2017, IntoPix SA <contact@intopix.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opj_includes.h"
+
+#ifndef OPJ_SPARSE_ARRAY_H
+#define OPJ_SPARSE_ARRAY_H
+/**
+@file sparse_array.h
+@brief Sparse array management
+
+The functions in this file manage sparse arrays. Sparse arrays are arrays with
+potential big dimensions, but with very few samples actually set. Such sparse
+arrays require allocating a low amount of memory, by just allocating memory
+for blocks of the array that are set. The minimum memory allocation unit is a
+a block. There is a trade-off to pick up an appropriate dimension for blocks.
+If it is too big, and pixels set are far from each other, too much memory will
+be used. If blocks are too small, the book-keeping costs of blocks will raise.
+*/
+
+/** @defgroup SPARSE_ARRAY SPARSE ARRAYS - Sparse arrays */
+/*@{*/
+
+/** Opaque type for sparse arrays that contain int32 values */
+typedef struct opj_sparse_array_int32 opj_sparse_array_int32_t;
+
+/** Creates a new sparse array.
+ * @param width total width of the array.
+ * @param height total height of the array
+ * @param block_width width of a block.
+ * @param block_height height of a block.
+ * @return a new sparse array instance, or NULL in case of failure.
+ */
+opj_sparse_array_int32_t* opj_sparse_array_int32_create(OPJ_UINT32 width,
+        OPJ_UINT32 height,
+        OPJ_UINT32 block_width,
+        OPJ_UINT32 block_height);
+
+/** Frees a sparse array.
+ * @param sa sparse array instance.
+ */
+void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa);
+
+/** Returns whether region bounds are valid (non empty and within array bounds)
+ * @param sa sparse array instance.
+ * @param x0 left x coordinate of the region.
+ * @param y0 top x coordinate of the region.
+ * @param x1 right x coordinate (not included) of the region. Must be greater than x0.
+ * @param y1 bottom y coordinate (not included) of the region. Must be greater than y0.
+ * @return OPJ_TRUE or OPJ_FALSE.
+ */
+OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa,
+        OPJ_UINT32 x0,
+        OPJ_UINT32 y0,
+        OPJ_UINT32 x1,
+        OPJ_UINT32 y1);
+
+/** Read the content of a rectangular region of the sparse array into a
+ * user buffer.
+ *
+ * Regions not written with opj_sparse_array_int32_write() are read as 0.
+ *
+ * @param sa sparse array instance.
+ * @param x0 left x coordinate of the region to read in the sparse array.
+ * @param y0 top x coordinate of the region to read in the sparse array.
+ * @param x1 right x coordinate (not included) of the region to read in the sparse array. Must be greater than x0.
+ * @param y1 bottom y coordinate (not included) of the region to read in the sparse array. Must be greater than y0.
+ * @param dest user buffer to fill. Must be at least sizeof(int32) * ( (y1 - y0 - 1) * dest_line_stride + (x1 - x0 - 1) * dest_col_stride + 1) bytes large.
+ * @param dest_col_stride spacing (in elements, not in bytes) in x dimension between consecutive elements of the user buffer.
+ * @param dest_line_stride spacing (in elements, not in bytes) in y dimension between consecutive elements of the user buffer.
+ * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned.
+ * @return OPJ_TRUE in case of success.
+ */
+OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa,
+                                     OPJ_UINT32 x0,
+                                     OPJ_UINT32 y0,
+                                     OPJ_UINT32 x1,
+                                     OPJ_UINT32 y1,
+                                     OPJ_INT32* dest,
+                                     OPJ_UINT32 dest_col_stride,
+                                     OPJ_UINT32 dest_line_stride,
+                                     OPJ_BOOL forgiving);
+
+
+/** Write the content of a rectangular region into the sparse array from a
+ * user buffer.
+ *
+ * Blocks intersecting the region are allocated, if not already done.
+ *
+ * @param sa sparse array instance.
+ * @param x0 left x coordinate of the region to write into the sparse array.
+ * @param y0 top x coordinate of the region to write into the sparse array.
+ * @param x1 right x coordinate (not included) of the region to write into the sparse array. Must be greater than x0.
+ * @param y1 bottom y coordinate (not included) of the region to write into the sparse array. Must be greater than y0.
+ * @param src user buffer to fill. Must be at least sizeof(int32) * ( (y1 - y0 - 1) * src_line_stride + (x1 - x0 - 1) * src_col_stride + 1) bytes large.
+ * @param src_col_stride spacing (in elements, not in bytes) in x dimension between consecutive elements of the user buffer.
+ * @param src_line_stride spacing (in elements, not in bytes) in y dimension between consecutive elements of the user buffer.
+ * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned.
+ * @return OPJ_TRUE in case of success.
+ */
+OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa,
+                                      OPJ_UINT32 x0,
+                                      OPJ_UINT32 y0,
+                                      OPJ_UINT32 x1,
+                                      OPJ_UINT32 y1,
+                                      const OPJ_INT32* src,
+                                      OPJ_UINT32 src_col_stride,
+                                      OPJ_UINT32 src_line_stride,
+                                      OPJ_BOOL forgiving);
+
+/*@}*/
+
+#endif /* OPJ_SPARSE_ARRAY_H */
\ No newline at end of file
diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
index 15e166f2d..a583e6920 100644
--- a/src/lib/openjp2/t1.c
+++ b/src/lib/openjp2/t1.c
@@ -38,7 +38,20 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#define OPJ_SKIP_POISON
 #include "opj_includes.h"
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#if defined(__GNUC__)
+#pragma GCC poison malloc calloc realloc free
+#endif
+
 #include "t1_luts.h"
 
 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
@@ -1426,44 +1439,27 @@ static OPJ_BOOL opj_t1_allocate_buffers(
     OPJ_UINT32 w,
     OPJ_UINT32 h)
 {
-    size_t flagssize;
+    OPJ_UINT32 flagssize;
     OPJ_UINT32 flags_stride;
 
+    /* No risk of overflow. Prior checks ensure those assert are met */
+    /* They are per the specification */
+    assert(w <= 1024);
+    assert(h <= 1024);
+    assert(w * h <= 4096);
+
     /* encoder uses tile buffer, so no need to allocate */
     if (!t1->encoder) {
-        size_t datasize;
+        OPJ_UINT32 datasize = w * h;
 
-#if (SIZE_MAX / 0xFFFFFFFFU) < 0xFFFFFFFFU /* UINT32_MAX */
-        /* Overflow check */
-        if ((w > 0U) && ((size_t)h > (SIZE_MAX / (size_t)w))) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
-        }
-#endif
-        datasize = (size_t)w * h;
-
-        /* Overflow check */
-        if (datasize > (SIZE_MAX / sizeof(OPJ_INT32))) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
-        }
-
-        if (datasize > (size_t)t1->datasize) {
+        if (datasize > t1->datasize) {
             opj_aligned_free(t1->data);
             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
             if (!t1->data) {
                 /* FIXME event manager error callback */
                 return OPJ_FALSE;
             }
-#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */
-            /* TODO remove this if t1->datasize type changes to size_t */
-            /* Overflow check */
-            if (datasize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
-#endif
-            t1->datasize = (OPJ_UINT32)datasize;
+            t1->datasize = datasize;
         }
         /* memset first arg is declared to never be null by gcc */
         if (t1->data != NULL) {
@@ -1471,40 +1467,18 @@ static OPJ_BOOL opj_t1_allocate_buffers(
         }
     }
 
-    /* Overflow check */
-    if (w > (0xFFFFFFFFU /* UINT32_MAX */ - 2U)) {
-        /* FIXME event manager error callback */
-        return OPJ_FALSE;
-    }
     flags_stride = w + 2U; /* can't be 0U */
 
-#if (SIZE_MAX - 3U) < 0xFFFFFFFFU /* UINT32_MAX */
-    /* Overflow check */
-    if (h > (0xFFFFFFFFU /* UINT32_MAX */ - 3U)) {
-        /* FIXME event manager error callback */
-        return OPJ_FALSE;
-    }
-#endif
     flagssize = (h + 3U) / 4U + 2U;
 
-    /* Overflow check */
-    if (flagssize > (SIZE_MAX / (size_t)flags_stride)) {
-        /* FIXME event manager error callback */
-        return OPJ_FALSE;
-    }
-    flagssize *= (size_t)flags_stride;
+    flagssize *= flags_stride;
     {
-        /* BIG FAT XXX */
         opj_flag_t* p;
         OPJ_UINT32 x;
         OPJ_UINT32 flags_height = (h + 3U) / 4U;
 
-        if (flagssize > (size_t)t1->flagssize) {
-            /* Overflow check */
-            if (flagssize > (SIZE_MAX / sizeof(opj_flag_t))) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
+        if (flagssize > t1->flagssize) {
+
             opj_aligned_free(t1->flags);
             t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
                             opj_flag_t));
@@ -1512,16 +1486,8 @@ static OPJ_BOOL opj_t1_allocate_buffers(
                 /* FIXME event manager error callback */
                 return OPJ_FALSE;
             }
-#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */
-            /* TODO remove this if t1->flagssize type changes to size_t */
-            /* Overflow check */
-            if (flagssize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
-#endif
         }
-        t1->flagssize = (OPJ_UINT32)flagssize;
+        t1->flagssize = flagssize;
 
         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
 
@@ -1610,6 +1576,7 @@ void opj_t1_destroy(opj_t1_t *p_t1)
 }
 
 typedef struct {
+    OPJ_BOOL whole_tile_decoding;
     OPJ_UINT32 resno;
     opj_tcd_cblk_dec_t* cblk;
     opj_tcd_band_t* band;
@@ -1643,12 +1610,43 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
     OPJ_UINT32 tile_w;
 
     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
-    resno = job->resno;
+
     cblk = job->cblk;
+
+    if (!job->whole_tile_decoding) {
+        cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
+        cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
+
+        cblk->decoded_data = opj_aligned_malloc(cblk_w * cblk_h * sizeof(OPJ_INT32));
+        if (cblk->decoded_data == NULL) {
+            if (job->p_manager_mutex) {
+                opj_mutex_lock(job->p_manager_mutex);
+            }
+            opj_event_msg(job->p_manager, EVT_ERROR,
+                          "Cannot allocate cblk->decoded_data\n");
+            if (job->p_manager_mutex) {
+                opj_mutex_unlock(job->p_manager_mutex);
+            }
+            *(job->pret) = OPJ_FALSE;
+            opj_free(job);
+            return;
+        }
+        /* Zero-init required */
+        memset(cblk->decoded_data, 0, cblk_w * cblk_h * sizeof(OPJ_INT32));
+    } else if (cblk->decoded_data) {
+        /* Not sure if that code path can happen, but better be */
+        /* safe than sorry */
+        opj_aligned_free(cblk->decoded_data);
+        cblk->decoded_data = NULL;
+    }
+
+    resno = job->resno;
     band = job->band;
     tilec = job->tilec;
     tccp = job->tccp;
-    tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+    tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
+                          -
+                          tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
 
     if (!*(job->pret)) {
         opj_free(job);
@@ -1687,7 +1685,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
         y += pres->y1 - pres->y0;
     }
 
-    datap = t1->data;
+    datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
     cblk_w = t1->w;
     cblk_h = t1->h;
 
@@ -1712,9 +1710,49 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
             }
         }
     }
-    if (tccp->qmfbid == 1) {
-        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w +
-                                                       (OPJ_UINT32)x];
+
+    /* Both can be non NULL if for example decoding a full tile and then */
+    /* partially a tile. In which case partial decoding should be the */
+    /* priority */
+    assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
+
+    if (cblk->decoded_data) {
+        OPJ_UINT32 cblk_size = cblk_w * cblk_h;
+        if (tccp->qmfbid == 1) {
+            for (i = 0; i < cblk_size; ++i) {
+                datap[i] /= 2;
+            }
+        } else {        /* if (tccp->qmfbid == 0) */
+            i = 0;
+#ifdef __SSE2__
+            {
+                const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
+                for (; i < (cblk_size & ~15U); i += 16) {
+                    __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 0)));
+                    __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 4)));
+                    __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 8)));
+                    __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 12)));
+                    _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
+                    datap += 16;
+                }
+            }
+#endif
+            for (; i < cblk_size; ++i) {
+                OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
+                memcpy(datap, &tmp, sizeof(tmp));
+                datap++;
+            }
+        }
+    } else if (tccp->qmfbid == 1) {
+        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
+                                                       (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             i = 0;
             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
@@ -1722,19 +1760,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 0U] = tmp0 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 1U] = tmp1 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 2U] = tmp2 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 3U] = tmp3 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
             }
             for (; i < cblk_w; ++i) {
                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
             }
         }
     } else {        /* if (tccp->qmfbid == 0) */
-        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y *
-                                                         tile_w + (OPJ_UINT32)x];
+        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
+                                                         tile_w + (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
             for (i = 0; i < cblk_w; ++i) {
@@ -1763,6 +1801,11 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
     opj_thread_pool_t* tp = tcd->thread_pool;
     OPJ_UINT32 resno, bandno, precno, cblkno;
 
+#ifdef DEBUG_VERBOSE
+    OPJ_UINT32 codeblocks_decoded = 0;
+    printf("Enter opj_t1_decode_cblks()\n");
+#endif
+
     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
 
@@ -1771,7 +1814,6 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
 
             for (precno = 0; precno < res->pw * res->ph; ++precno) {
                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
-                OPJ_BOOL skip_precinct = OPJ_FALSE;
 
                 if (!opj_tcd_is_subband_area_of_interest(tcd,
                         tilec->compno,
@@ -1781,51 +1823,60 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
                         (OPJ_UINT32)precinct->y0,
                         (OPJ_UINT32)precinct->x1,
                         (OPJ_UINT32)precinct->y1)) {
-                    skip_precinct = OPJ_TRUE;
-                    /* TODO: do a continue here once the below 0 initialization */
-                    /* of tiledp is removed */
+                    for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
+                        opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
+                        if (cblk->decoded_data) {
+#ifdef DEBUG_VERBOSE
+                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
+                                   cblk->x0, cblk->y0, resno, bandno);
+#endif
+                            opj_aligned_free(cblk->decoded_data);
+                            cblk->decoded_data = NULL;
+                        }
+                    }
+                    continue;
                 }
 
                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
                     opj_t1_cblk_decode_processing_job_t* job;
 
-                    if (skip_precinct ||
-                            !opj_tcd_is_subband_area_of_interest(tcd,
-                                    tilec->compno,
-                                    resno,
-                                    band->bandno,
-                                    (OPJ_UINT32)cblk->x0,
-                                    (OPJ_UINT32)cblk->y0,
-                                    (OPJ_UINT32)cblk->x1,
-                                    (OPJ_UINT32)cblk->y1)) {
-
-                        /* TODO: remove this once we don't iterate over */
-                        /* tile pixels that are not in the subwindow of interest */
-                        OPJ_UINT32 j;
-                        OPJ_INT32 x = cblk->x0 - band->x0;
-                        OPJ_INT32 y = cblk->y0 - band->y0;
-                        OPJ_INT32* OPJ_RESTRICT tiledp;
-                        OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+                    if (!opj_tcd_is_subband_area_of_interest(tcd,
+                            tilec->compno,
+                            resno,
+                            band->bandno,
+                            (OPJ_UINT32)cblk->x0,
+                            (OPJ_UINT32)cblk->y0,
+                            (OPJ_UINT32)cblk->x1,
+                            (OPJ_UINT32)cblk->y1)) {
+                        if (cblk->decoded_data) {
+#ifdef DEBUG_VERBOSE
+                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
+                                   cblk->x0, cblk->y0, resno, bandno);
+#endif
+                            opj_aligned_free(cblk->decoded_data);
+                            cblk->decoded_data = NULL;
+                        }
+                        continue;
+                    }
+
+                    if (!tcd->whole_tile_decoding) {
                         OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
                         OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
-
-                        if (band->bandno & 1) {
-                            opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
-                            x += pres->x1 - pres->x0;
-                        }
-                        if (band->bandno & 2) {
-                            opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
-                            y += pres->y1 - pres->y0;
+                        if (cblk->decoded_data != NULL) {
+#ifdef DEBUG_VERBOSE
+                            printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
+                                   cblk->x0, cblk->y0, resno, bandno);
+#endif
+                            continue;
                         }
-
-                        tiledp = &tilec->data[(OPJ_UINT32)y * tile_w +
-                                                            (OPJ_UINT32)x];
-
-                        for (j = 0; j < cblk_h; ++j) {
-                            memset(tiledp + j * tile_w, 0, cblk_w * sizeof(OPJ_INT32));
+                        if (cblk_w == 0 || cblk_h == 0) {
+                            continue;
                         }
-                        continue;
+#ifdef DEBUG_VERBOSE
+                        printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
+                               cblk->x0, cblk->y0, resno, bandno);
+#endif
                     }
 
                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
@@ -1834,6 +1885,7 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
                         *pret = OPJ_FALSE;
                         return;
                     }
+                    job->whole_tile_decoding = tcd->whole_tile_decoding;
                     job->resno = resno;
                     job->cblk = cblk;
                     job->band = band;
@@ -1845,6 +1897,9 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
                     job->check_pterm = check_pterm;
                     job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
+#ifdef DEBUG_VERBOSE
+                    codeblocks_decoded ++;
+#endif
                     if (!(*pret)) {
                         return;
                     }
@@ -1853,6 +1908,9 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
         } /* bandno */
     } /* resno */
 
+#ifdef DEBUG_VERBOSE
+    printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
+#endif
     return;
 }
 
@@ -1874,6 +1932,7 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
     OPJ_BYTE* cblkdata = NULL;
     OPJ_UINT32 cblkdataindex = 0;
     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
+    OPJ_INT32* original_t1_data = NULL;
 
     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
 
@@ -1940,6 +1999,13 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
         cblkdata = cblk->chunks[0].data;
     }
 
+    /* For subtile decoding, directly decode in the decoded_data buffer of */
+    /* the code-block. Hack t1->data to point to it, and restore it later */
+    if (cblk->decoded_data) {
+        original_t1_data = t1->data;
+        t1->data = cblk->decoded_data;
+    }
+
     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
         opj_tcd_seg_t *seg = &cblk->segs[segno];
 
@@ -2019,6 +2085,11 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
         }
     }
 
+    /* Restore original t1->data is needed */
+    if (cblk->decoded_data) {
+        t1->data = original_t1_data;
+    }
+
     return OPJ_TRUE;
 }
 
@@ -2062,7 +2133,8 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         OPJ_INT32* OPJ_RESTRICT tiledp;
                         OPJ_UINT32 cblk_w;
                         OPJ_UINT32 cblk_h;
-                        OPJ_UINT32 i, j, tileIndex = 0, tileLineAdvance;
+                        OPJ_UINT32 i, j, tileLineAdvance;
+                        OPJ_SIZE_T tileIndex = 0;
 
                         OPJ_INT32 x = cblk->x0 - band->x0;
                         OPJ_INT32 y = cblk->y0 - band->y0;
@@ -2086,7 +2158,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         cblk_h = t1->h;
                         tileLineAdvance = tile_w - cblk_w;
 
-                        tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
+                        tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
                         t1->data = tiledp;
                         t1->data_stride = tile_w;
                         if (tccp->qmfbid == 1) {
diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c
index 1c56c1b37..c652f8ba9 100644
--- a/src/lib/openjp2/tcd.c
+++ b/src/lib/openjp2/tcd.c
@@ -190,6 +190,10 @@ static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd,
         opj_codestream_info_t *p_cstr_info,
         opj_event_mgr_t *p_manager);
 
+
+static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *tcd,
+        OPJ_UINT32 compno);
+
 /* ----------------------------------------------------------------------- */
 
 /**
@@ -679,7 +683,7 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec)
             ((l_tilec->data_size_needed > l_tilec->data_size) &&
              (l_tilec->ownsData == OPJ_FALSE))) {
         l_tilec->data = (OPJ_INT32 *) opj_image_data_alloc(l_tilec->data_size_needed);
-        if (! l_tilec->data) {
+        if (!l_tilec->data && l_tilec->data_size_needed != 0) {
             return OPJ_FALSE;
         }
         /*fprintf(stderr, "tAllocate data of tilec (int): %d x OPJ_UINT32n",l_data_size);*/
@@ -794,22 +798,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
         l_tilec->compno = compno;
         /*fprintf(stderr, "\tTile compo border = %d,%d,%d,%d\n", l_tilec->x0, l_tilec->y0,l_tilec->x1,l_tilec->y1);*/
 
-        /* compute l_data_size with overflow check */
-        l_data_size = (OPJ_UINT32)(l_tilec->x1 - l_tilec->x0);
-        /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */
-        if ((l_data_size > 0U) &&
-                ((((OPJ_UINT32) - 1) / l_data_size) < (OPJ_UINT32)(l_tilec->y1 -
-                        l_tilec->y0))) {
-            opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n");
-            return OPJ_FALSE;
-        }
-        l_data_size = l_data_size * (OPJ_UINT32)(l_tilec->y1 - l_tilec->y0);
-
-        if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(OPJ_UINT32)) < l_data_size) {
-            opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n");
-            return OPJ_FALSE;
-        }
-        l_data_size = l_data_size * (OPJ_UINT32)sizeof(OPJ_UINT32);
         l_tilec->numresolutions = l_tccp->numresolutions;
         if (l_tccp->numresolutions < l_cp->m_specific_param.m_dec.m_reduce) {
             l_tilec->minimum_num_resolutions = 1;
@@ -818,15 +806,39 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
                                                l_cp->m_specific_param.m_dec.m_reduce;
         }
 
-        l_tilec->data_size_needed = l_data_size;
-        if (p_tcd->m_is_decoder && !opj_alloc_tile_component_data(l_tilec)) {
-            opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n");
-            return OPJ_FALSE;
+        if (isEncoder) {
+            OPJ_SIZE_T l_tile_data_size;
+
+            /* compute l_data_size with overflow check */
+            OPJ_SIZE_T w = (OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0);
+            OPJ_SIZE_T h = (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0);
+
+            /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */
+            if (h > 0 && w > SIZE_MAX / h) {
+                opj_event_msg(manager, EVT_ERROR, "Size of tile data exceeds system limits\n");
+                return OPJ_FALSE;
+            }
+            l_tile_data_size = w * h;
+
+            if (SIZE_MAX / sizeof(OPJ_UINT32) < l_tile_data_size) {
+                opj_event_msg(manager, EVT_ERROR, "Size of tile data exceeds system limits\n");
+                return OPJ_FALSE;
+            }
+            l_tile_data_size = l_tile_data_size * sizeof(OPJ_UINT32);
+
+            l_tilec->data_size_needed = l_tile_data_size;
         }
 
         l_data_size = l_tilec->numresolutions * (OPJ_UINT32)sizeof(
                           opj_tcd_resolution_t);
 
+        opj_image_data_free(l_tilec->data_win);
+        l_tilec->data_win = NULL;
+        l_tilec->win_x0 = 0;
+        l_tilec->win_y0 = 0;
+        l_tilec->win_x1 = 0;
+        l_tilec->win_y1 = 0;
+
         if (l_tilec->resolutions == 00) {
             l_tilec->resolutions = (opj_tcd_resolution_t *) opj_malloc(l_data_size);
             if (! l_tilec->resolutions) {
@@ -875,6 +887,7 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
             l_res->y0 = opj_int_ceildivpow2(l_tilec->y0, (OPJ_INT32)l_level_no);
             l_res->x1 = opj_int_ceildivpow2(l_tilec->x1, (OPJ_INT32)l_level_no);
             l_res->y1 = opj_int_ceildivpow2(l_tilec->y1, (OPJ_INT32)l_level_no);
+
             /*fprintf(stderr, "\t\t\tres_x0= %d, res_y0 =%d, res_x1=%d, res_y1=%d\n", l_res->x0, l_res->y0, l_res->x1, l_res->y1);*/
             /* p. 35, table A-23, ISO/IEC FDIS154444-1 : 2000 (18 august 2000) */
             l_pdx = l_tccp->prcw[resno];
@@ -894,14 +907,14 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
             /*fprintf(stderr, "\t\t\tres_pw=%d, res_ph=%d\n", l_res->pw, l_res->ph );*/
 
             if ((l_res->pw != 0U) && ((((OPJ_UINT32) - 1) / l_res->pw) < l_res->ph)) {
-                opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n");
+                opj_event_msg(manager, EVT_ERROR, "Size of tile data exceeds system limits\n");
                 return OPJ_FALSE;
             }
             l_nb_precincts = l_res->pw * l_res->ph;
 
             if ((((OPJ_UINT32) - 1) / (OPJ_UINT32)sizeof(opj_tcd_precinct_t)) <
                     l_nb_precincts) {
-                opj_event_msg(manager, EVT_ERROR, "Not enough memory for tile data\n");
+                opj_event_msg(manager, EVT_ERROR, "Size of tile data exceeds system limits\n");
                 return OPJ_FALSE;
             }
             l_nb_precinct_size = l_nb_precincts * (OPJ_UINT32)sizeof(opj_tcd_precinct_t);
@@ -1249,6 +1262,9 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t *
         OPJ_UINT32 l_numchunksalloc = p_code_block->numchunksalloc;
         OPJ_UINT32 i;
 
+        opj_aligned_free(p_code_block->decoded_data);
+        p_code_block->decoded_data = 00;
+
         memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t));
         p_code_block->segs = l_segs;
         p_code_block->m_current_max_segs = l_current_max_segs;
@@ -1262,7 +1278,8 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t *
     return OPJ_TRUE;
 }
 
-OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd)
+OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd,
+        OPJ_BOOL take_into_account_partial_decoding)
 {
     OPJ_UINT32 i;
     OPJ_UINT32 l_data_size = 0;
@@ -1276,6 +1293,7 @@ OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd)
     l_img_comp = p_tcd->image->comps;
 
     for (i = 0; i < p_tcd->image->numcomps; ++i) {
+        OPJ_UINT32 w, h;
         l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/
         l_remaining = l_img_comp->prec & 7;  /* (%8) */
 
@@ -1288,8 +1306,17 @@ OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd)
         }
 
         l_res = l_tile_comp->resolutions + l_tile_comp->minimum_num_resolutions - 1;
-        l_temp = (OPJ_UINT32)((l_res->x1 - l_res->x0) * (l_res->y1 -
-                              l_res->y0)); /* x1*y1 can't overflow */
+        if (take_into_account_partial_decoding && !p_tcd->whole_tile_decoding) {
+            w = l_res->win_x1 - l_res->win_x0;
+            h = l_res->win_y1 - l_res->win_y0;
+        } else {
+            w = (OPJ_UINT32)(l_res->x1 - l_res->x0);
+            h = (OPJ_UINT32)(l_res->y1 - l_res->y0);
+        }
+        if (h > 0 && UINT_MAX / w < h) {
+            return UINT_MAX;
+        }
+        l_temp = w * h;
         if (l_size_comp && UINT_MAX / l_size_comp < l_temp) {
             return UINT_MAX;
         }
@@ -1339,7 +1366,8 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
                 p_cstr_info->tile[p_tile_no].pdy[i] = (int)l_tccp->prch[i];
             }
             p_cstr_info->tile[p_tile_no].packet = (opj_packet_info_t*) opj_calloc((
-                    size_t)p_cstr_info->numcomps * (size_t)p_cstr_info->numlayers * l_num_packs,
+                    OPJ_SIZE_T)p_cstr_info->numcomps * (OPJ_SIZE_T)p_cstr_info->numlayers *
+                                                  l_num_packs,
                                                   sizeof(opj_packet_info_t));
             if (!p_cstr_info->tile[p_tile_no].packet) {
                 /* FIXME event manager error callback */
@@ -1401,10 +1429,10 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
 }
 
 OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd,
-                             OPJ_UINT32 decoded_x0,
-                             OPJ_UINT32 decoded_y0,
-                             OPJ_UINT32 decoded_x1,
-                             OPJ_UINT32 decoded_y1,
+                             OPJ_UINT32 win_x0,
+                             OPJ_UINT32 win_y0,
+                             OPJ_UINT32 win_x1,
+                             OPJ_UINT32 win_y1,
                              OPJ_BYTE *p_src,
                              OPJ_UINT32 p_max_length,
                              OPJ_UINT32 p_tile_no,
@@ -1413,12 +1441,93 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd,
                             )
 {
     OPJ_UINT32 l_data_read;
+    OPJ_UINT32 compno;
+
     p_tcd->tcd_tileno = p_tile_no;
     p_tcd->tcp = &(p_tcd->cp->tcps[p_tile_no]);
-    p_tcd->decoded_x0 = decoded_x0;
-    p_tcd->decoded_y0 = decoded_y0;
-    p_tcd->decoded_x1 = decoded_x1;
-    p_tcd->decoded_y1 = decoded_y1;
+    p_tcd->win_x0 = win_x0;
+    p_tcd->win_y0 = win_y0;
+    p_tcd->win_x1 = win_x1;
+    p_tcd->win_y1 = win_y1;
+    p_tcd->whole_tile_decoding = OPJ_TRUE;
+
+    for (compno = 0; compno < p_tcd->image->numcomps; compno++) {
+        if (!opj_tcd_is_whole_tilecomp_decoding(p_tcd, compno)) {
+            p_tcd->whole_tile_decoding = OPJ_FALSE;
+            break;
+        }
+    }
+
+    if (p_tcd->whole_tile_decoding) {
+        for (compno = 0; compno < p_tcd->image->numcomps; compno++) {
+            opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]);
+            opj_tcd_resolution_t *l_res = &
+                                          (tilec->resolutions[tilec->minimum_num_resolutions - 1]);
+            OPJ_SIZE_T l_data_size;
+
+            /* compute l_data_size with overflow check */
+            OPJ_SIZE_T res_w = (OPJ_SIZE_T)(l_res->x1 - l_res->x0);
+            OPJ_SIZE_T res_h = (OPJ_SIZE_T)(l_res->y1 - l_res->y0);
+
+            /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */
+            if (res_h > 0 && res_w > SIZE_MAX / res_h) {
+                opj_event_msg(p_manager, EVT_ERROR,
+                              "Size of tile data exceeds system limits\n");
+                return OPJ_FALSE;
+            }
+            l_data_size = res_w * res_h;
+
+            if (SIZE_MAX / sizeof(OPJ_UINT32) < l_data_size) {
+                opj_event_msg(p_manager, EVT_ERROR,
+                              "Size of tile data exceeds system limits\n");
+                return OPJ_FALSE;
+            }
+            l_data_size *= sizeof(OPJ_UINT32);
+
+            tilec->data_size_needed = l_data_size;
+
+            if (!opj_alloc_tile_component_data(tilec)) {
+                opj_event_msg(p_manager, EVT_ERROR,
+                              "Size of tile data exceeds system limits\n");
+                return OPJ_FALSE;
+            }
+        }
+    } else {
+        /* Compute restricted tile-component and tile-resolution coordinates */
+        /* of the window of interest, but defer the memory allocation until */
+        /* we know the resno_decoded */
+        for (compno = 0; compno < p_tcd->image->numcomps; compno++) {
+            OPJ_UINT32 resno;
+            opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]);
+            opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]);
+            /* Compute the intersection of the area of interest, expressed in tile coordinates */
+            /* with the tile coordinates */
+            tilec->win_x0 = opj_uint_max(
+                                (OPJ_UINT32)tilec->x0,
+                                opj_uint_ceildiv(p_tcd->win_x0, image_comp->dx));
+            tilec->win_y0 = opj_uint_max(
+                                (OPJ_UINT32)tilec->y0,
+                                opj_uint_ceildiv(p_tcd->win_y0, image_comp->dy));
+            tilec->win_x1 = opj_uint_min(
+                                (OPJ_UINT32)tilec->x1,
+                                opj_uint_ceildiv(p_tcd->win_x1, image_comp->dx));
+            tilec->win_y1 = opj_uint_min(
+                                (OPJ_UINT32)tilec->y1,
+                                opj_uint_ceildiv(p_tcd->win_y1, image_comp->dy));
+
+            for (resno = 0; resno < tilec->numresolutions; ++resno) {
+                opj_tcd_resolution_t *res = tilec->resolutions + resno;
+                res->win_x0 = opj_uint_ceildivpow2(tilec->win_x0,
+                                                   tilec->numresolutions - 1 - resno);
+                res->win_y0 = opj_uint_ceildivpow2(tilec->win_y0,
+                                                   tilec->numresolutions - 1 - resno);
+                res->win_x1 = opj_uint_ceildivpow2(tilec->win_x1,
+                                                   tilec->numresolutions - 1 - resno);
+                res->win_y1 = opj_uint_ceildivpow2(tilec->win_y1,
+                                                   tilec->numresolutions - 1 - resno);
+            }
+        }
+    }
 
 #ifdef TODO_MSD /* FIXME */
     /* INDEX >>  */
@@ -1461,6 +1570,45 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd,
     }
     /* FIXME _ProfStop(PGROUP_T1); */
 
+
+    /* For subtile decoding, now we know the resno_decoded, we can allocate */
+    /* the tile data buffer */
+    if (!p_tcd->whole_tile_decoding) {
+        for (compno = 0; compno < p_tcd->image->numcomps; compno++) {
+            opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]);
+            opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]);
+            opj_tcd_resolution_t *res = tilec->resolutions + image_comp->resno_decoded;
+            OPJ_SIZE_T w = res->win_x1 - res->win_x0;
+            OPJ_SIZE_T h = res->win_y1 - res->win_y0;
+            OPJ_SIZE_T l_data_size;
+
+            opj_image_data_free(tilec->data_win);
+            tilec->data_win = NULL;
+
+            if (w > 0 && h > 0) {
+                if (w > SIZE_MAX / h) {
+                    opj_event_msg(p_manager, EVT_ERROR,
+                                  "Size of tile data exceeds system limits\n");
+                    return OPJ_FALSE;
+                }
+                l_data_size = w * h;
+                if (l_data_size > SIZE_MAX / sizeof(OPJ_INT32)) {
+                    opj_event_msg(p_manager, EVT_ERROR,
+                                  "Size of tile data exceeds system limits\n");
+                    return OPJ_FALSE;
+                }
+                l_data_size *= sizeof(OPJ_INT32);
+
+                tilec->data_win = opj_image_data_alloc(l_data_size);
+                if (tilec->data_win == NULL) {
+                    opj_event_msg(p_manager, EVT_ERROR,
+                                  "Size of tile data exceeds system limits\n");
+                    return OPJ_FALSE;
+                }
+            }
+        }
+    }
+
     /*----------------DWT---------------------*/
 
     /* FIXME _ProfStart(PGROUP_DWT); */
@@ -1502,7 +1650,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd,
     OPJ_UINT32 l_size_comp, l_remaining;
     OPJ_UINT32 l_stride, l_width, l_height;
 
-    l_data_size = opj_tcd_get_decoded_tile_size(p_tcd);
+    l_data_size = opj_tcd_get_decoded_tile_size(p_tcd, OPJ_TRUE);
     if (l_data_size == UINT_MAX || l_data_size > p_dest_length) {
         return OPJ_FALSE;
     }
@@ -1511,12 +1659,23 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd,
     l_img_comp = p_tcd->image->comps;
 
     for (i = 0; i < p_tcd->image->numcomps; ++i) {
+        const OPJ_INT32* l_src_data;
         l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/
         l_remaining = l_img_comp->prec & 7;  /* (%8) */
         l_res = l_tilec->resolutions + l_img_comp->resno_decoded;
-        l_width = (OPJ_UINT32)(l_res->x1 - l_res->x0);
-        l_height = (OPJ_UINT32)(l_res->y1 - l_res->y0);
-        l_stride = (OPJ_UINT32)(l_tilec->x1 - l_tilec->x0) - l_width;
+        if (p_tcd->whole_tile_decoding) {
+            l_width = (OPJ_UINT32)(l_res->x1 - l_res->x0);
+            l_height = (OPJ_UINT32)(l_res->y1 - l_res->y0);
+            l_stride = (OPJ_UINT32)(l_tilec->resolutions[l_tilec->minimum_num_resolutions -
+                                                                     1].x1 -
+                                    l_tilec->resolutions[l_tilec->minimum_num_resolutions - 1].x0) - l_width;
+            l_src_data = l_tilec->data;
+        } else {
+            l_width = l_res->win_x1 - l_res->win_x0;
+            l_height = l_res->win_y1 - l_res->win_y0;
+            l_stride = 0;
+            l_src_data = l_tilec->data_win;
+        }
 
         if (l_remaining) {
             ++l_size_comp;
@@ -1529,7 +1688,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd,
         switch (l_size_comp) {
         case 1: {
             OPJ_CHAR * l_dest_ptr = (OPJ_CHAR *) p_dest;
-            const OPJ_INT32 * l_src_ptr = l_tilec->data;
+            const OPJ_INT32 * l_src_ptr = l_src_data;
 
             if (l_img_comp->sgnd) {
                 for (j = 0; j < l_height; ++j) {
@@ -1551,7 +1710,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd,
         }
         break;
         case 2: {
-            const OPJ_INT32 * l_src_ptr = l_tilec->data;
+            const OPJ_INT32 * l_src_ptr = l_src_data;
             OPJ_INT16 * l_dest_ptr = (OPJ_INT16 *) p_dest;
 
             if (l_img_comp->sgnd) {
@@ -1579,7 +1738,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd,
         break;
         case 4: {
             OPJ_INT32 * l_dest_ptr = (OPJ_INT32 *) p_dest;
-            OPJ_INT32 * l_src_ptr = l_tilec->data;
+            const OPJ_INT32 * l_src_ptr = l_src_data;
 
             for (j = 0; j < l_height; ++j) {
                 memcpy(l_dest_ptr, l_src_ptr, l_width * sizeof(OPJ_INT32));
@@ -1674,6 +1833,9 @@ static void opj_tcd_free_tile(opj_tcd_t *p_tcd)
             l_tile_comp->data_size = 0;
             l_tile_comp->data_size_needed = 0;
         }
+
+        opj_image_data_free(l_tile_comp->data_win);
+
         ++l_tile_comp;
     }
 
@@ -1764,18 +1926,6 @@ static OPJ_BOOL opj_tcd_dwt_decode(opj_tcd_t *p_tcd)
     opj_image_comp_t * l_img_comp = p_tcd->image->comps;
 
     for (compno = 0; compno < l_tile->numcomps; compno++) {
-        /*
-        if (tcd->cp->reduce != 0) {
-                tcd->image->comps[compno].resno_decoded =
-                        tile->comps[compno].numresolutions - tcd->cp->reduce - 1;
-                if (tcd->image->comps[compno].resno_decoded < 0)
-                {
-                        return false;
-                }
-        }
-        numres2decode = tcd->image->comps[compno].resno_decoded + 1;
-        if(numres2decode > 0){
-        */
 
         if (l_tccp->qmfbid == 1) {
             if (! opj_dwt_decode(p_tcd, l_tile_comp,
@@ -1796,6 +1946,7 @@ static OPJ_BOOL opj_tcd_dwt_decode(opj_tcd_t *p_tcd)
 
     return OPJ_TRUE;
 }
+
 static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
 {
     opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles;
@@ -1807,17 +1958,40 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
         return OPJ_TRUE;
     }
 
-    l_samples = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) *
-                             (l_tile_comp->y1 - l_tile_comp->y0));
+    if (p_tcd->whole_tile_decoding) {
+        /* A bit inefficient: we process more data than needed if */
+        /* resno_decoded < l_tile_comp->minimum_num_resolutions-1, */
+        /* but we would need to take into account a stride then */
+        l_samples = (OPJ_UINT32)((
+                                     l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].x1 -
+                                     l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].x0) *
+                                 (l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].y1 -
+                                  l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].y0));
+    } else {
+        opj_tcd_resolution_t* l_res;
+        l_res = l_tile_comp->resolutions + p_tcd->image->comps[0].resno_decoded;
+        l_samples = (l_res->win_x1 - l_res->win_x0) *
+                    (l_res->win_y1 - l_res->win_y0);
+    }
 
     if (l_tile->numcomps >= 3) {
+        opj_tcd_resolution_t* res_comp0 = l_tile->comps[0].resolutions +
+                                          p_tcd->image->comps[0].resno_decoded;
+        opj_tcd_resolution_t* res_comp1 = l_tile->comps[1].resolutions +
+                                          p_tcd->image->comps[1].resno_decoded;
+        opj_tcd_resolution_t* res_comp2 = l_tile->comps[2].resolutions +
+                                          p_tcd->image->comps[2].resno_decoded;
+        OPJ_SIZE_T l_res_samples = (OPJ_SIZE_T)(res_comp0->x1 - res_comp0->x0) *
+                                   (OPJ_SIZE_T)(res_comp0->y1 - res_comp0->y0);
         /* testcase 1336.pdf.asan.47.376 */
-        if ((l_tile->comps[0].x1 - l_tile->comps[0].x0) * (l_tile->comps[0].y1 -
-                l_tile->comps[0].y0) < (OPJ_INT32)l_samples ||
-                (l_tile->comps[1].x1 - l_tile->comps[1].x0) * (l_tile->comps[1].y1 -
-                        l_tile->comps[1].y0) < (OPJ_INT32)l_samples ||
-                (l_tile->comps[2].x1 - l_tile->comps[2].x0) * (l_tile->comps[2].y1 -
-                        l_tile->comps[2].y0) < (OPJ_INT32)l_samples) {
+        if (p_tcd->image->comps[0].resno_decoded !=
+                p_tcd->image->comps[1].resno_decoded ||
+                p_tcd->image->comps[0].resno_decoded !=
+                p_tcd->image->comps[2].resno_decoded ||
+                (OPJ_SIZE_T)(res_comp1->x1 - res_comp1->x0) *
+                (OPJ_SIZE_T)(res_comp1->y1 - res_comp1->y0) != l_res_samples ||
+                (OPJ_SIZE_T)(res_comp2->x1 - res_comp2->x0) *
+                (OPJ_SIZE_T)(res_comp2->y1 - res_comp2->y0) != l_res_samples) {
             opj_event_msg(p_manager, EVT_ERROR,
                           "Tiles don't all have the same dimension. Skip the MCT step.\n");
             return OPJ_FALSE;
@@ -1834,7 +2008,11 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
             }
 
             for (i = 0; i < l_tile->numcomps; ++i) {
-                l_data[i] = (OPJ_BYTE*) l_tile_comp->data;
+                if (p_tcd->whole_tile_decoding) {
+                    l_data[i] = (OPJ_BYTE*) l_tile_comp->data;
+                } else {
+                    l_data[i] = (OPJ_BYTE*) l_tile_comp->data_win;
+                }
                 ++l_tile_comp;
             }
 
@@ -1855,15 +2033,29 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
             opj_free(l_data);
         } else {
             if (l_tcp->tccps->qmfbid == 1) {
-                opj_mct_decode(l_tile->comps[0].data,
-                               l_tile->comps[1].data,
-                               l_tile->comps[2].data,
-                               l_samples);
+                if (p_tcd->whole_tile_decoding) {
+                    opj_mct_decode(l_tile->comps[0].data,
+                                   l_tile->comps[1].data,
+                                   l_tile->comps[2].data,
+                                   l_samples);
+                } else {
+                    opj_mct_decode(l_tile->comps[0].data_win,
+                                   l_tile->comps[1].data_win,
+                                   l_tile->comps[2].data_win,
+                                   l_samples);
+                }
             } else {
-                opj_mct_decode_real((OPJ_FLOAT32*)l_tile->comps[0].data,
-                                    (OPJ_FLOAT32*)l_tile->comps[1].data,
-                                    (OPJ_FLOAT32*)l_tile->comps[2].data,
-                                    l_samples);
+                if (p_tcd->whole_tile_decoding) {
+                    opj_mct_decode_real((OPJ_FLOAT32*)l_tile->comps[0].data,
+                                        (OPJ_FLOAT32*)l_tile->comps[1].data,
+                                        (OPJ_FLOAT32*)l_tile->comps[2].data,
+                                        l_samples);
+                } else {
+                    opj_mct_decode_real((OPJ_FLOAT32*)l_tile->comps[0].data_win,
+                                        (OPJ_FLOAT32*)l_tile->comps[1].data_win,
+                                        (OPJ_FLOAT32*)l_tile->comps[2].data_win,
+                                        l_samples);
+                }
             }
         }
     } else {
@@ -1896,12 +2088,24 @@ static OPJ_BOOL opj_tcd_dc_level_shift_decode(opj_tcd_t *p_tcd)
 
     for (compno = 0; compno < l_tile->numcomps; compno++) {
         l_res = l_tile_comp->resolutions + l_img_comp->resno_decoded;
-        l_width = (OPJ_UINT32)(l_res->x1 - l_res->x0);
-        l_height = (OPJ_UINT32)(l_res->y1 - l_res->y0);
-        l_stride = (OPJ_UINT32)(l_tile_comp->x1 - l_tile_comp->x0) - l_width;
 
-        assert(l_height == 0 ||
-               l_width + l_stride <= l_tile_comp->data_size / l_height); /*MUPDF*/
+        if (!p_tcd->whole_tile_decoding) {
+            l_width = l_res->win_x1 - l_res->win_x0;
+            l_height = l_res->win_y1 - l_res->win_y0;
+            l_stride = 0;
+            l_current_ptr = l_tile_comp->data_win;
+        } else {
+            l_width = (OPJ_UINT32)(l_res->x1 - l_res->x0);
+            l_height = (OPJ_UINT32)(l_res->y1 - l_res->y0);
+            l_stride = (OPJ_UINT32)(
+                           l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].x1 -
+                           l_tile_comp->resolutions[l_tile_comp->minimum_num_resolutions - 1].x0)
+                       - l_width;
+            l_current_ptr = l_tile_comp->data;
+
+            assert(l_height == 0 ||
+                   l_width + l_stride <= l_tile_comp->data_size / l_height); /*MUPDF*/
+        }
 
         if (l_img_comp->sgnd) {
             l_min = -(1 << (l_img_comp->prec - 1));
@@ -1911,11 +2115,11 @@ static OPJ_BOOL opj_tcd_dc_level_shift_decode(opj_tcd_t *p_tcd)
             l_max = (OPJ_INT32)((1U << l_img_comp->prec) - 1);
         }
 
-        l_current_ptr = l_tile_comp->data;
 
         if (l_tccp->qmfbid == 1) {
             for (j = 0; j < l_height; ++j) {
                 for (i = 0; i < l_width; ++i) {
+                    /* TODO: do addition on int64 ? */
                     *l_current_ptr = opj_int_clamp(*l_current_ptr + l_tccp->m_dc_level_shift, l_min,
                                                    l_max);
                     ++l_current_ptr;
@@ -1926,13 +2130,14 @@ static OPJ_BOOL opj_tcd_dc_level_shift_decode(opj_tcd_t *p_tcd)
             for (j = 0; j < l_height; ++j) {
                 for (i = 0; i < l_width; ++i) {
                     OPJ_FLOAT32 l_value = *((OPJ_FLOAT32 *) l_current_ptr);
-                    OPJ_INT32 l_value_int = (OPJ_INT32)opj_lrintf(l_value);
-                    if (l_value > INT_MAX ||
-                            (l_value_int > 0 && l_tccp->m_dc_level_shift > 0 &&
-                             l_value_int > INT_MAX - l_tccp->m_dc_level_shift)) {
+                    if (l_value > INT_MAX) {
                         *l_current_ptr = l_max;
+                    } else if (l_value < INT_MIN) {
+                        *l_current_ptr = l_min;
                     } else {
-                        *l_current_ptr = opj_int_clamp(
+                        /* Do addition on int64 to avoid overflows */
+                        OPJ_INT64 l_value_int = (OPJ_INT64)opj_lrintf(l_value);
+                        *l_current_ptr = (OPJ_INT32)opj_int64_clamp(
                                              l_value_int + l_tccp->m_dc_level_shift, l_min, l_max);
                     }
                     ++l_current_ptr;
@@ -1981,6 +2186,9 @@ static void opj_tcd_code_block_dec_deallocate(opj_tcd_precinct_t * p_precinct)
                 l_code_block->chunks = 00;
             }
 
+            opj_aligned_free(l_code_block->decoded_data);
+            l_code_block->decoded_data = NULL;
+
             ++l_code_block;
         }
 
@@ -2026,9 +2234,10 @@ static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct)
     }
 }
 
-OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd)
+OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd)
 {
-    OPJ_UINT32 i, l_data_size = 0;
+    OPJ_UINT32 i;
+    OPJ_SIZE_T l_data_size = 0;
     opj_image_comp_t * l_img_comp = 00;
     opj_tcd_tilecomp_t * l_tilec = 00;
     OPJ_UINT32 l_size_comp, l_remaining;
@@ -2047,8 +2256,8 @@ OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd)
             l_size_comp = 4;
         }
 
-        l_data_size += l_size_comp * (OPJ_UINT32)((l_tilec->x1 - l_tilec->x0) *
-                       (l_tilec->y1 - l_tilec->y0));
+        l_data_size += l_size_comp * ((OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0) *
+                                      (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0));
         ++l_img_comp;
         ++l_tilec;
     }
@@ -2063,7 +2272,7 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd)
     opj_tccp_t * l_tccp = 00;
     opj_image_comp_t * l_img_comp = 00;
     opj_tcd_tile_t * l_tile;
-    OPJ_UINT32 l_nb_elem, i;
+    OPJ_SIZE_T l_nb_elem, i;
     OPJ_INT32 * l_current_ptr;
 
     l_tile = p_tcd->tcd_image->tiles;
@@ -2073,8 +2282,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd)
 
     for (compno = 0; compno < l_tile->numcomps; compno++) {
         l_current_ptr = l_tile_comp->data;
-        l_nb_elem = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) *
-                                 (l_tile_comp->y1 - l_tile_comp->y0));
+        l_nb_elem = (OPJ_SIZE_T)(l_tile_comp->x1 - l_tile_comp->x0) *
+                    (OPJ_SIZE_T)(l_tile_comp->y1 - l_tile_comp->y0);
 
         if (l_tccp->qmfbid == 1) {
             for (i = 0; i < l_nb_elem; ++i) {
@@ -2100,8 +2309,8 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd)
 {
     opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles;
     opj_tcd_tilecomp_t * l_tile_comp = p_tcd->tcd_image->tiles->comps;
-    OPJ_UINT32 samples = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) *
-                                      (l_tile_comp->y1 - l_tile_comp->y0));
+    OPJ_SIZE_T samples = (OPJ_SIZE_T)(l_tile_comp->x1 - l_tile_comp->x0) *
+                         (OPJ_SIZE_T)(l_tile_comp->y1 - l_tile_comp->y0);
     OPJ_UINT32 i;
     OPJ_BYTE ** l_data = 00;
     opj_tcp_t * l_tcp = p_tcd->tcp;
@@ -2283,13 +2492,15 @@ static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd,
 
 OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd,
                                 OPJ_BYTE * p_src,
-                                OPJ_UINT32 p_src_length)
+                                OPJ_SIZE_T p_src_length)
 {
-    OPJ_UINT32 i, j, l_data_size = 0;
+    OPJ_UINT32 i;
+    OPJ_SIZE_T j;
+    OPJ_SIZE_T l_data_size = 0;
     opj_image_comp_t * l_img_comp = 00;
     opj_tcd_tilecomp_t * l_tilec = 00;
     OPJ_UINT32 l_size_comp, l_remaining;
-    OPJ_UINT32 l_nb_elem;
+    OPJ_SIZE_T l_nb_elem;
 
     l_data_size = opj_tcd_get_encoded_tile_size(p_tcd);
     if (l_data_size != p_src_length) {
@@ -2301,8 +2512,8 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd,
     for (i = 0; i < p_tcd->image->numcomps; ++i) {
         l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/
         l_remaining = l_img_comp->prec & 7;  /* (%8) */
-        l_nb_elem = (OPJ_UINT32)((l_tilec->x1 - l_tilec->x0) * (l_tilec->y1 -
-                                 l_tilec->y0));
+        l_nb_elem = (OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0) *
+                    (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0);
 
         if (l_remaining) {
             ++l_size_comp;
@@ -2396,16 +2607,16 @@ OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd,
     /* with the tile coordinates */
     OPJ_UINT32 tcx0 = opj_uint_max(
                           (OPJ_UINT32)tilec->x0,
-                          opj_uint_ceildiv(tcd->decoded_x0, image_comp->dx));
+                          opj_uint_ceildiv(tcd->win_x0, image_comp->dx));
     OPJ_UINT32 tcy0 = opj_uint_max(
                           (OPJ_UINT32)tilec->y0,
-                          opj_uint_ceildiv(tcd->decoded_y0, image_comp->dy));
+                          opj_uint_ceildiv(tcd->win_y0, image_comp->dy));
     OPJ_UINT32 tcx1 = opj_uint_min(
                           (OPJ_UINT32)tilec->x1,
-                          opj_uint_ceildiv(tcd->decoded_x1, image_comp->dx));
+                          opj_uint_ceildiv(tcd->win_x1, image_comp->dx));
     OPJ_UINT32 tcy1 = opj_uint_min(
                           (OPJ_UINT32)tilec->y1,
-                          opj_uint_ceildiv(tcd->decoded_y1, image_comp->dy));
+                          opj_uint_ceildiv(tcd->win_y1, image_comp->dy));
     /* Compute number of decomposition for this band. See table F-1 */
     OPJ_UINT32 nb = (resno == 0) ?
                     tilec->numresolutions - 1 :
@@ -2452,3 +2663,44 @@ OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd,
 #endif
     return intersects;
 }
+
+/** Returns whether a tile componenent is fully decoded, taking into account
+ * p_tcd->win_* members.
+ *
+ * @param p_tcd    TCD handle.
+ * @param compno Component number
+ * @return OPJ_TRUE whether the tile componenent is fully decoded
+ */
+static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *p_tcd,
+        OPJ_UINT32 compno)
+{
+    opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]);
+    opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]);
+    /* Compute the intersection of the area of interest, expressed in tile coordinates */
+    /* with the tile coordinates */
+    OPJ_UINT32 tcx0 = opj_uint_max(
+                          (OPJ_UINT32)tilec->x0,
+                          opj_uint_ceildiv(p_tcd->win_x0, image_comp->dx));
+    OPJ_UINT32 tcy0 = opj_uint_max(
+                          (OPJ_UINT32)tilec->y0,
+                          opj_uint_ceildiv(p_tcd->win_y0, image_comp->dy));
+    OPJ_UINT32 tcx1 = opj_uint_min(
+                          (OPJ_UINT32)tilec->x1,
+                          opj_uint_ceildiv(p_tcd->win_x1, image_comp->dx));
+    OPJ_UINT32 tcy1 = opj_uint_min(
+                          (OPJ_UINT32)tilec->y1,
+                          opj_uint_ceildiv(p_tcd->win_y1, image_comp->dy));
+
+    OPJ_UINT32 shift = tilec->numresolutions - tilec->minimum_num_resolutions;
+    /* Tolerate small margin within the reduced resolution factor to consider if */
+    /* the whole tile path must be taken */
+    return (tcx0 >= (OPJ_UINT32)tilec->x0 &&
+            tcy0 >= (OPJ_UINT32)tilec->y0 &&
+            tcx1 <= (OPJ_UINT32)tilec->x1 &&
+            tcy1 <= (OPJ_UINT32)tilec->y1 &&
+            (shift >= 32 ||
+             (((tcx0 - (OPJ_UINT32)tilec->x0) >> shift) == 0 &&
+              ((tcy0 - (OPJ_UINT32)tilec->y0) >> shift) == 0 &&
+              (((OPJ_UINT32)tilec->x1 - tcx1) >> shift) == 0 &&
+              (((OPJ_UINT32)tilec->y1 - tcy1) >> shift) == 0)));
+}
diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h
index bf3c457e8..63c22c457 100644
--- a/src/lib/openjp2/tcd.h
+++ b/src/lib/openjp2/tcd.h
@@ -134,6 +134,8 @@ typedef struct opj_tcd_cblk_dec {
     OPJ_UINT32 m_current_max_segs;  /* allocated number of segs[] items */
     OPJ_UINT32 numchunks;           /* Number of valid chunks items */
     OPJ_UINT32 numchunksalloc;      /* Number of chunks item allocated */
+    /* Decoded code-block. Only used for subtile decoding. Otherwise tilec->data is directly updated */
+    OPJ_INT32* decoded_data;
 } opj_tcd_cblk_dec_t;
 
 /** Precinct structure */
@@ -175,6 +177,12 @@ typedef struct opj_tcd_resolution {
     OPJ_UINT32 numbands;
     /* subband information */
     opj_tcd_band_t bands[3];
+
+    /* dimension of the resolution limited to window of interest. Only valid if tcd->whole_tile_decoding is set */
+    OPJ_UINT32 win_x0;
+    OPJ_UINT32 win_y0;
+    OPJ_UINT32 win_x1;
+    OPJ_UINT32 win_y1;
 } opj_tcd_resolution_t;
 
 /** Tile-component structure */
@@ -191,14 +199,24 @@ typedef struct opj_tcd_tilecomp {
     opj_tcd_resolution_t *resolutions;
     /* size of data for resolutions (in bytes) */
     OPJ_UINT32 resolutions_size;
-    /* data of the component */
+
+    /* data of the component. For decoding, only valid if tcd->whole_tile_decoding is set (so exclusive of data_win member) */
     OPJ_INT32 *data;
     /* if true, then need to free after usage, otherwise do not free */
     OPJ_BOOL  ownsData;
     /* we may either need to allocate this amount of data, or re-use image data and ignore this value */
-    OPJ_UINT32 data_size_needed;
+    size_t data_size_needed;
     /* size of the data of the component */
-    OPJ_UINT32 data_size;
+    size_t data_size;
+
+    /** data of the component limited to window of interest. Only valid for decoding and if tcd->whole_tile_decoding is NOT set (so exclusive of data member) */
+    OPJ_INT32 *data_win;
+    /* dimension of the component limited to window of interest. Only valid for decoding and  if tcd->whole_tile_decoding is NOT set */
+    OPJ_UINT32 win_x0;
+    OPJ_UINT32 win_y0;
+    OPJ_UINT32 win_x1;
+    OPJ_UINT32 win_y1;
+
     /* add fixed_quality */
     OPJ_INT32 numpix;
 } opj_tcd_tilecomp_t;
@@ -256,10 +274,12 @@ typedef struct opj_tcd {
     /** Thread pool */
     opj_thread_pool_t* thread_pool;
     /** Coordinates of the window of interest, in grid reference space */
-    OPJ_UINT32 decoded_x0;
-    OPJ_UINT32 decoded_y0;
-    OPJ_UINT32 decoded_x1;
-    OPJ_UINT32 decoded_y1;
+    OPJ_UINT32 win_x0;
+    OPJ_UINT32 win_y0;
+    OPJ_UINT32 win_x1;
+    OPJ_UINT32 win_y1;
+    /** Only valid for decoding. Whether the whole tile is decoded, or just the region in win_x0/win_y0/win_x1/win_y1 */
+    OPJ_BOOL   whole_tile_decoding;
 } opj_tcd_t;
 
 /** @name Exported functions */
@@ -331,7 +351,8 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
 /**
  * Gets the maximum tile size that will be taken by the tile once decoded.
  */
-OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd);
+OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd,
+        OPJ_BOOL take_into_account_partial_decoding);
 
 /**
  * Encodes a tile from the raw image into the given buffer.
@@ -356,10 +377,10 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
 /**
 Decode a tile from a buffer into a raw image
 @param tcd TCD handle
-@param decoded_x0 Upper left x of region to decode (in grid coordinates)
-@param decoded_y0 Upper left y of region to decode (in grid coordinates)
-@param decoded_x1 Lower right x of region to decode (in grid coordinates)
-@param decoded_y1 Lower right y of region to decode (in grid coordinates)
+@param win_x0 Upper left x of region to decode (in grid coordinates)
+@param win_y0 Upper left y of region to decode (in grid coordinates)
+@param win_x1 Lower right x of region to decode (in grid coordinates)
+@param win_y1 Lower right y of region to decode (in grid coordinates)
 @param src Source buffer
 @param len Length of source buffer
 @param tileno Number that identifies one of the tiles to be decoded
@@ -367,10 +388,10 @@ Decode a tile from a buffer into a raw image
 @param manager the event manager.
 */
 OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *tcd,
-                             OPJ_UINT32 decoded_x0,
-                             OPJ_UINT32 decoded_y0,
-                             OPJ_UINT32 decoded_x1,
-                             OPJ_UINT32 decoded_y1,
+                             OPJ_UINT32 win_x0,
+                             OPJ_UINT32 win_y0,
+                             OPJ_UINT32 win_x1,
+                             OPJ_UINT32 win_y1,
                              OPJ_BYTE *src,
                              OPJ_UINT32 len,
                              OPJ_UINT32 tileno,
@@ -388,7 +409,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd,
 /**
  *
  */
-OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd);
+OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd);
 
 /**
  * Initialize the tile coder and may reuse some meory.
@@ -407,7 +428,7 @@ OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd,
  */
 OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd,
                                 OPJ_BYTE * p_src,
-                                OPJ_UINT32 p_src_length);
+                                OPJ_SIZE_T p_src_length);
 
 /**
  * Allocates tile component data
@@ -427,7 +448,7 @@ void opj_tcd_reinit_segment(opj_tcd_seg_t* seg);
 
 
 /** Returns whether a sub-band region contributes to the area of interest
- * tcd->decoded_x0,tcd->decoded_y0,tcd->decoded_x1,tcd->decoded_y1.
+ * tcd->win_x0,tcd->win_y0,tcd->win_x1,tcd->win_y1.
  *
  * @param tcd    TCD handle.
  * @param compno Component number
@@ -449,7 +470,6 @@ OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd,
         OPJ_UINT32 x1,
         OPJ_UINT32 y1);
 
-
 /* ----------------------------------------------------------------------- */
 /*@}*/
 
diff --git a/src/lib/openjp2/test_sparse_array.c b/src/lib/openjp2/test_sparse_array.c
new file mode 100644
index 000000000..8e1364515
--- /dev/null
+++ b/src/lib/openjp2/test_sparse_array.c
@@ -0,0 +1,174 @@
+/*
+ * The copyright in this software is being made available under the 2-clauses
+ * BSD License, included below. This software may be subject to other third
+ * party and contributor rights, including patent rights, and no such rights
+ * are granted under this license.
+ *
+ * Copyright (c) 2017, IntoPix SA <contact@intopix.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#undef NDEBUG
+
+#include "opj_includes.h"
+
+int main()
+{
+    OPJ_UINT32 i, j, w, h;
+    OPJ_INT32 buffer[ 99 * 101 ];
+    OPJ_BOOL ret;
+    opj_sparse_array_int32_t* sa;
+
+    sa = opj_sparse_array_int32_create(0, 1, 1, 1);
+    assert(sa == NULL);
+    opj_sparse_array_int32_free(sa);
+
+    sa = opj_sparse_array_int32_create(1, 0, 1, 1);
+    assert(sa == NULL);
+
+    sa = opj_sparse_array_int32_create(1, 1, 0, 1);
+    assert(sa == NULL);
+
+    sa = opj_sparse_array_int32_create(1, 1, 1, 0);
+    assert(sa == NULL);
+
+    sa = opj_sparse_array_int32_create(99, 101, ~0U, ~0U);
+    assert(sa == NULL);
+
+    sa = opj_sparse_array_int32_create(99, 101, 15, 17);
+    opj_sparse_array_int32_free(sa);
+
+    sa = opj_sparse_array_int32_create(99, 101, 15, 17);
+    ret = opj_sparse_array_int32_read(sa, 0, 0, 0, 1, buffer, 1, 1, OPJ_FALSE);
+    assert(!ret);
+    ret = opj_sparse_array_int32_read(sa, 0, 0, 1, 0, buffer, 1, 1, OPJ_FALSE);
+    assert(!ret);
+    ret = opj_sparse_array_int32_read(sa, 0, 0, 100, 1, buffer, 1, 1, OPJ_FALSE);
+    assert(!ret);
+    ret = opj_sparse_array_int32_read(sa, 0, 0, 1, 102, buffer, 1, 1, OPJ_FALSE);
+    assert(!ret);
+    ret = opj_sparse_array_int32_read(sa, 1, 0, 0, 1, buffer, 1, 1, OPJ_FALSE);
+    assert(!ret);
+    ret = opj_sparse_array_int32_read(sa, 0, 1, 1, 0, buffer, 1, 1, OPJ_FALSE);
+    assert(!ret);
+    ret = opj_sparse_array_int32_read(sa, 99, 101, 99, 101, buffer, 1, 1,
+                                      OPJ_FALSE);
+    assert(!ret);
+
+    buffer[0] = 1;
+    ret = opj_sparse_array_int32_read(sa, 0, 0, 1, 1, buffer, 1, 1, OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 0);
+
+    memset(buffer, 0xFF, sizeof(buffer));
+    ret = opj_sparse_array_int32_read(sa, 0, 0, 99, 101, buffer, 1, 99, OPJ_FALSE);
+    assert(ret);
+    for (i = 0; i < 99 * 101; i++) {
+        assert(buffer[i] == 0);
+    }
+
+    buffer[0] = 1;
+    ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
+                                       OPJ_FALSE);
+    assert(ret);
+
+    buffer[0] = 2;
+    ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
+                                       OPJ_FALSE);
+    assert(ret);
+
+    buffer[0] = 0;
+    buffer[1] = 0xFF;
+    ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
+                                      OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 2);
+    assert(buffer[1] == 0xFF);
+
+    buffer[0] = 0xFF;
+    buffer[1] = 0xFF;
+    buffer[2] = 0xFF;
+    ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 2, buffer, 0, 1,
+                                      OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 2);
+    assert(buffer[1] == 0);
+    assert(buffer[2] == 0xFF);
+
+    buffer[0] = 3;
+    ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 0, 1,
+                                       OPJ_FALSE);
+    assert(ret);
+
+    buffer[0] = 0;
+    buffer[1] = 0xFF;
+    ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
+                                      OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 3);
+    assert(buffer[1] == 0xFF);
+
+    w = 15 + 1;
+    h = 17 + 1;
+    memset(buffer, 0xFF, sizeof(buffer));
+    ret = opj_sparse_array_int32_read(sa, 2, 1, 2 + w, 1 + h, buffer, 1, w,
+                                      OPJ_FALSE);
+    assert(ret);
+    for (j = 0; j < h; j++) {
+        for (i = 0; i < w; i++) {
+            if (i == 4 - 2 && j == 5 - 1) {
+                assert(buffer[ j * w + i ] == 3);
+            } else {
+                assert(buffer[ j * w + i ] == 0);
+            }
+        }
+    }
+
+    opj_sparse_array_int32_free(sa);
+
+
+    sa = opj_sparse_array_int32_create(99, 101, 15, 17);
+    memset(buffer, 0xFF, sizeof(buffer));
+    ret = opj_sparse_array_int32_read(sa, 0, 0, 2, 1, buffer, 2, 4, OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 0);
+    assert(buffer[1] == -1);
+    assert(buffer[2] == 0);
+
+    buffer[0] = 1;
+    buffer[2] = 3;
+    ret = opj_sparse_array_int32_write(sa, 0, 0, 2, 1, buffer, 2, 4, OPJ_FALSE);
+    assert(ret);
+
+    memset(buffer, 0xFF, sizeof(buffer));
+    ret = opj_sparse_array_int32_read(sa, 0, 0, 2, 1, buffer, 2, 4, OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 1);
+    assert(buffer[1] == -1);
+    assert(buffer[2] == 3);
+
+    opj_sparse_array_int32_free(sa);
+
+    return 0;
+}
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 579e066b8..95b752915 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -103,6 +103,10 @@ add_test(NAME tda_prep_irreversible_203_201_17_19_no_precinct COMMAND test_tile_
 add_test(NAME tda_irreversible_203_201_17_19_no_precinct COMMAND test_decode_area -q irreversible_203_201_17_19_no_precinct.j2k)
 set_property(TEST tda_irreversible_203_201_17_19_no_precinct APPEND PROPERTY DEPENDS tda_prep_irreversible_203_201_17_19_no_precinct)
 
+add_test(NAME tda_prep_strip COMMAND test_tile_encoder 1 256 256 256 256 8 0 tda_single_tile.j2k)
+add_test(NAME tda_strip COMMAND test_decode_area -q -strip_height 3 -strip_check tda_single_tile.j2k)
+set_property(TEST tda_strip APPEND PROPERTY DEPENDS tda_prep_strip)
+
 add_executable(include_openjpeg include_openjpeg.c)
 
 # No image send to the dashboard if lib PNG is not available.
diff --git a/tests/test_decode_area.c b/tests/test_decode_area.c
index e773bc667..59da86aeb 100644
--- a/tests/test_decode_area.c
+++ b/tests/test_decode_area.c
@@ -99,31 +99,13 @@ static void info_callback(const char *msg, void *client_data)
     /*fprintf(stdout, "[INFO] %s", msg);*/
 }
 
-opj_image_t* decode(
-    OPJ_BOOL quiet,
-    const char* input_file,
-    OPJ_INT32 x0,
-    OPJ_INT32 y0,
-    OPJ_INT32 x1,
-    OPJ_INT32 y1,
-    OPJ_UINT32* ptilew,
-    OPJ_UINT32* ptileh,
-    OPJ_UINT32* pcblkw,
-    OPJ_UINT32* pcblkh)
+static opj_codec_t* create_codec_and_stream(const char* input_file,
+        opj_stream_t** pOutStream)
 {
     opj_dparameters_t l_param;
     opj_codec_t * l_codec = NULL;
-    opj_image_t * l_image = NULL;
     opj_stream_t * l_stream = NULL;
 
-    if (!quiet) {
-        if (x0 != 0 || x1 != 0 || y0 != 0 || y1 != 0) {
-            printf("Decoding %d,%d,%d,%d\n", x0, y0, x1, y1);
-        } else {
-            printf("Decoding full image\n");
-        }
-    }
-
     l_stream = opj_stream_create_default_file_stream(input_file, OPJ_TRUE);
     if (!l_stream) {
         fprintf(stderr, "ERROR -> failed to create the stream from the file\n");
@@ -168,6 +150,40 @@ opj_image_t* decode(
         return NULL;
     }
 
+    *pOutStream = l_stream;
+    return l_codec;
+}
+
+
+opj_image_t* decode(
+    OPJ_BOOL quiet,
+    const char* input_file,
+    OPJ_INT32 x0,
+    OPJ_INT32 y0,
+    OPJ_INT32 x1,
+    OPJ_INT32 y1,
+    OPJ_UINT32* ptilew,
+    OPJ_UINT32* ptileh,
+    OPJ_UINT32* pcblkw,
+    OPJ_UINT32* pcblkh)
+{
+    opj_codec_t * l_codec = NULL;
+    opj_image_t * l_image = NULL;
+    opj_stream_t * l_stream = NULL;
+
+    if (!quiet) {
+        if (x0 != 0 || x1 != 0 || y0 != 0 || y1 != 0) {
+            printf("Decoding %d,%d,%d,%d\n", x0, y0, x1, y1);
+        } else {
+            printf("Decoding full image\n");
+        }
+    }
+
+    l_codec = create_codec_and_stream(input_file, &l_stream);
+    if (l_codec == NULL) {
+        return NULL;
+    }
+
     /* Read the main header of the codestream and if necessary the JP2 boxes*/
     if (! opj_read_header(l_stream, l_codec, &l_image)) {
         fprintf(stderr, "ERROR -> failed to read the header\n");
@@ -226,6 +242,140 @@ opj_image_t* decode(
     return l_image;
 }
 
+int decode_by_strip(OPJ_BOOL quiet,
+                    const char* input_file,
+                    OPJ_UINT32 strip_height,
+                    OPJ_INT32 da_x0,
+                    OPJ_INT32 da_y0,
+                    OPJ_INT32 da_x1,
+                    OPJ_INT32 da_y1,
+                    opj_image_t* full_image)
+{
+    /* OPJ_UINT32 tilew, tileh; */
+    opj_codec_t * l_codec = NULL;
+    opj_image_t * l_image = NULL;
+    opj_stream_t * l_stream = NULL;
+    OPJ_UINT32 x0, y0, x1, y1, y;
+    OPJ_UINT32 full_x0, full_y0, full_x1, full_y1;
+
+    l_codec = create_codec_and_stream(input_file, &l_stream);
+    if (l_codec == NULL) {
+        return 1;
+    }
+
+    /* Read the main header of the codestream and if necessary the JP2 boxes*/
+    if (! opj_read_header(l_stream, l_codec, &l_image)) {
+        fprintf(stderr, "ERROR -> failed to read the header\n");
+        opj_stream_destroy(l_stream);
+        opj_destroy_codec(l_codec);
+        return 1;
+    }
+
+    full_x0 = l_image->x0;
+    full_y0 = l_image->y0;
+    full_x1 = l_image->x1;
+    full_y1 = l_image->y1;
+
+    if (da_x0 != 0 || da_y0 != 0 || da_x1 != 0 || da_y1 != 0) {
+        x0 = (OPJ_UINT32)da_x0;
+        y0 = (OPJ_UINT32)da_y0;
+        x1 = (OPJ_UINT32)da_x1;
+        y1 = (OPJ_UINT32)da_y1;
+    } else {
+        x0 = l_image->x0;
+        y0 = l_image->y0;
+        x1 = l_image->x1;
+        y1 = l_image->y1;
+    }
+    for (y = y0; y < y1; y += strip_height) {
+        OPJ_UINT32 h_req = strip_height;
+        if (y + h_req > y1) {
+            h_req = y1 - y;
+        }
+        if (!quiet) {
+            printf("Decoding %u...%u\n", y, y + h_req);
+        }
+        if (!opj_set_decode_area(l_codec, l_image, (OPJ_INT32)x0, (OPJ_INT32)y,
+                                 (OPJ_INT32)x1, (OPJ_INT32)(y + h_req))) {
+            fprintf(stderr, "ERROR -> failed to set the decoded area\n");
+            opj_stream_destroy(l_stream);
+            opj_destroy_codec(l_codec);
+            opj_image_destroy(l_image);
+            return 1;
+        }
+
+        /* Get the decoded image */
+        if (!(opj_decode(l_codec, l_stream, l_image))) {
+            fprintf(stderr, "ERROR -> failed to decode image!\n");
+            opj_stream_destroy(l_stream);
+            opj_destroy_codec(l_codec);
+            opj_image_destroy(l_image);
+            return 1;
+        }
+
+        if (full_image) {
+            OPJ_UINT32 y_check, x;
+            OPJ_UINT32 compno;
+            for (compno = 0; compno < l_image->numcomps; compno ++) {
+                for (y_check = 0; y_check < h_req; y_check++) {
+                    for (x = x0; x < x1; x++) {
+                        OPJ_INT32 sub_image_val =
+                            l_image->comps[compno].data[y_check * (x1 - x0) + (x - x0)];
+                        OPJ_INT32 image_val =
+                            full_image->comps[compno].data[(y + y_check) * (x1 - x0) + (x - x0)];
+                        if (sub_image_val != image_val) {
+                            fprintf(stderr,
+                                    "Difference found at subimage pixel (%u,%u) "
+                                    "of compno=%u: got %d, expected %d\n",
+                                    x, y_check + y, compno, sub_image_val, image_val);
+                            return 1;
+                        }
+                    }
+                }
+            }
+        }
+
+    }
+
+    /* If image is small enough, try a final whole image read */
+    if (full_x1 - full_x0 < 10000 && full_y1 - full_y0 < 10000) {
+        if (!quiet) {
+            printf("Decoding full image\n");
+        }
+        if (!opj_set_decode_area(l_codec, l_image,
+                                 (OPJ_INT32)full_x0, (OPJ_INT32)full_y0,
+                                 (OPJ_INT32)full_x1, (OPJ_INT32)full_y1)) {
+            fprintf(stderr, "ERROR -> failed to set the decoded area\n");
+            opj_stream_destroy(l_stream);
+            opj_destroy_codec(l_codec);
+            opj_image_destroy(l_image);
+            return 1;
+        }
+
+        /* Get the decoded image */
+        if (!(opj_decode(l_codec, l_stream, l_image))) {
+            fprintf(stderr, "ERROR -> failed to decode image!\n");
+            opj_stream_destroy(l_stream);
+            opj_destroy_codec(l_codec);
+            opj_image_destroy(l_image);
+            return 1;
+        }
+    }
+
+    if (! opj_end_decompress(l_codec, l_stream)) {
+        opj_stream_destroy(l_stream);
+        opj_destroy_codec(l_codec);
+        opj_image_destroy(l_image);
+        return 1;
+    }
+
+
+    opj_stream_destroy(l_stream);
+    opj_destroy_codec(l_codec);
+    opj_image_destroy(l_image);
+    return 0;
+}
+
 OPJ_BOOL check_consistency(opj_image_t* p_image, opj_image_t* p_sub_image)
 {
     OPJ_UINT32 compno;
@@ -273,10 +423,13 @@ int main(int argc, char** argv)
     OPJ_UINT32 step_x, step_y;
     OPJ_BOOL quiet = OPJ_FALSE;
     OPJ_UINT32 nsteps = 100;
+    OPJ_UINT32 strip_height = 0;
+    OPJ_BOOL strip_check = OPJ_FALSE;
 
     if (argc < 2) {
         fprintf(stderr,
-                "Usage: test_decode_area [-q] [-steps n] input_file_jp2_or_jk2 [x0 y0 x1 y1]\n");
+                "Usage: test_decode_area [-q] [-steps n] input_file_jp2_or_jk2 [x0 y0 x1 y1]\n"
+                "or   : test_decode_area [-q] [-strip_height h] [-strip_check] input_file_jp2_or_jk2 [x0 y0 x1 y1]\n");
         return 1;
     }
 
@@ -288,6 +441,11 @@ int main(int argc, char** argv)
             } else if (strcmp(argv[iarg], "-steps") == 0 && iarg + 1 < argc) {
                 nsteps = (OPJ_UINT32)atoi(argv[iarg + 1]);
                 iarg ++;
+            } else if (strcmp(argv[iarg], "-strip_height") == 0 && iarg + 1 < argc) {
+                strip_height = (OPJ_UINT32)atoi(argv[iarg + 1]);
+                iarg ++;
+            } else if (strcmp(argv[iarg], "-strip_check") == 0) {
+                strip_check = OPJ_TRUE;
             } else if (input_file == NULL) {
                 input_file = argv[iarg];
             } else if (iarg + 3 < argc) {
@@ -295,15 +453,30 @@ int main(int argc, char** argv)
                 da_y0 = atoi(argv[iarg + 1]);
                 da_x1 = atoi(argv[iarg + 2]);
                 da_y1 = atoi(argv[iarg + 3]);
+                if (da_x0 < 0 || da_y0 < 0 || da_x1 < 0 || da_y1 < 0) {
+                    fprintf(stderr, "Wrong bounds\n");
+                    return 1;
+                }
                 iarg += 3;
             }
         }
     }
 
-    l_image = decode(quiet, input_file, 0, 0, 0, 0,
-                     &tilew, &tileh, &cblkw, &cblkh);
-    if (!l_image) {
-        return 1;
+    if (!strip_height || strip_check) {
+        l_image = decode(quiet, input_file, 0, 0, 0, 0,
+                         &tilew, &tileh, &cblkw, &cblkh);
+        if (!l_image) {
+            return 1;
+        }
+    }
+
+    if (strip_height) {
+        int ret = decode_by_strip(quiet, input_file, strip_height, da_x0, da_y0, da_x1,
+                                  da_y1, l_image);
+        if (l_image) {
+            opj_image_destroy(l_image);
+        }
+        return ret;
     }
 
     if (da_x0 != 0 || da_x1 != 0 || da_y0 != 0 || da_y1 != 0) {