Skip to content

Commit

Permalink
Compressed resource optimization (#93)
Browse files Browse the repository at this point in the history
Co-authored-by: Mickaël Menu <mickael.menu@gmail.com>
  • Loading branch information
chocolatkey and mickael-menu authored Jul 3, 2024
1 parent 916550e commit cbc0d22
Show file tree
Hide file tree
Showing 11 changed files with 182 additions and 26 deletions.
13 changes: 0 additions & 13 deletions .eslintrc.js

This file was deleted.

15 changes: 13 additions & 2 deletions cmd/rwp/cmd/serve/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ import (
httprange "github.com/gotd/contrib/http_range"
"github.com/pkg/errors"
"github.com/readium/go-toolkit/cmd/rwp/cmd/serve/cache"
"github.com/readium/go-toolkit/pkg/archive"
"github.com/readium/go-toolkit/pkg/asset"
"github.com/readium/go-toolkit/pkg/fetcher"
"github.com/readium/go-toolkit/pkg/manifest"
"github.com/readium/go-toolkit/pkg/pub"
"github.com/readium/go-toolkit/pkg/streamer"
Expand Down Expand Up @@ -243,8 +245,17 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusPartialContent)
}

// Stream the asset
_, rerr = res.Stream(w, start, end)
cres, ok := res.(fetcher.CompressedResource)
if ok && cres.CompressedAs(archive.CompressionMethodDeflate) && start == 0 && end == 0 && supportsDeflate(r) {
// Stream the asset in compressed format
w.Header().Set("content-encoding", "deflate")
w.Header().Set("content-length", strconv.FormatInt(cres.CompressedLength(), 10))
_, err = cres.StreamCompressed(w)
} else {
// Stream the asset
_, rerr = res.Stream(w, start, end)
}

if rerr != nil {
if errors.Is(err, syscall.EPIPE) || errors.Is(err, syscall.ECONNRESET) {
// Ignore client errors
Expand Down
26 changes: 26 additions & 0 deletions cmd/rwp/cmd/serve/helpers.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package serve

import (
"net/http"
"strings"

"github.com/readium/go-toolkit/pkg/manifest"
Expand Down Expand Up @@ -62,3 +63,28 @@ func conformsToAsMimetype(conformsTo manifest.Profiles) string {
}
return mime
}

func supportsDeflate(r *http.Request) bool {
vv := r.Header.Values("Accept-Encoding")
for _, v := range vv {
for _, sv := range strings.Split(v, ",") {
coding := parseCoding(sv)
if coding == "" {
continue
}
if coding == "deflate" {
return true
}
}
}
return false
}

func parseCoding(s string) (coding string) {
p := strings.IndexRune(s, ';')
if p == -1 {
p = len(s)
}
coding = strings.ToLower(strings.TrimSpace(s[:p]))
return
}
3 changes: 2 additions & 1 deletion pkg/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,10 @@ type Entry interface {
Path() string // Absolute path to the entry in the archive.
Length() uint64 // Uncompressed data length.
CompressedLength() uint64 // Compressed data length.
CompressedAs(compressionMethod CompressionMethod) bool // Whether the entry is compressed using the given method.
Read(start int64, end int64) ([]byte, error) // Reads the whole content of this entry, or a portion when [start] or [end] are specified.
Stream(w io.Writer, start int64, end int64) (int64, error) // Streams the whole content of this entry to a writer, or a portion when [start] or [end] are specified.
// Close()
StreamCompressed(w io.Writer) (int64, error) // Streams the compressed content of this entry to a writer.
}

// Represents an immutable archive.
Expand Down
8 changes: 8 additions & 0 deletions pkg/archive/archive_exploded.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ func (e explodedArchiveEntry) CompressedLength() uint64 {
return 0
}

func (e explodedArchiveEntry) CompressedAs(compressionMethod CompressionMethod) bool {
return false
}

func (e explodedArchiveEntry) Read(start int64, end int64) ([]byte, error) {
if end < start {
return nil, errors.New("range not satisfiable")
Expand Down Expand Up @@ -82,6 +86,10 @@ func (e explodedArchiveEntry) Stream(w io.Writer, start int64, end int64) (int64
return n, nil
}

func (e explodedArchiveEntry) StreamCompressed(w io.Writer) (int64, error) {
return -1, errors.New("entry is not compressed")
}

// An archive exploded on the file system as a directory.
type explodedArchive struct {
directory string // Directory, already cleaned!
Expand Down
19 changes: 19 additions & 0 deletions pkg/archive/archive_zip.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ func (e gozipArchiveEntry) CompressedLength() uint64 {
return e.file.CompressedSize64
}

func (e gozipArchiveEntry) CompressedAs(compressionMethod CompressionMethod) bool {
if compressionMethod != CompressionMethodDeflate {
return false
}
return e.file.Method == zip.Deflate
}

// This is a special mode to minimize the number of reads from the underlying reader.
// It's especially useful when trying to stream the ZIP from a remote file, e.g.
// cloud storage. It's only enabled when trying to read the entire file and compression
Expand Down Expand Up @@ -145,6 +152,18 @@ func (e gozipArchiveEntry) Stream(w io.Writer, start int64, end int64) (int64, e
return n, nil
}

func (e gozipArchiveEntry) StreamCompressed(w io.Writer) (int64, error) {
if e.file.Method != zip.Deflate {
return -1, errors.New("not a compressed resource")
}
f, err := e.file.OpenRaw()
if err != nil {
return -1, err
}

return io.Copy(w, f)
}

// An archive from a zip file using go's stdlib
type gozipArchive struct {
zip *zip.Reader
Expand Down
10 changes: 10 additions & 0 deletions pkg/archive/compression.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package archive

import "archive/zip"

type CompressionMethod uint16

const (
CompressionMethodStore CompressionMethod = CompressionMethod(zip.Store)
CompressionMethodDeflate CompressionMethod = CompressionMethod(zip.Deflate)
)
19 changes: 19 additions & 0 deletions pkg/fetcher/fetcher_archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,25 @@ func (r *entryResource) Stream(w io.Writer, start int64, end int64) (int64, *Res
return -1, Other(err)
}

// CompressedAs implements CompressedResource
func (r *entryResource) CompressedAs(compressionMethod archive.CompressionMethod) bool {
return r.entry.CompressedAs(compressionMethod)
}

// CompressedLength implements CompressedResource
func (r *entryResource) CompressedLength() int64 {
return int64(r.entry.CompressedLength())
}

// StreamCompressed implements CompressedResource
func (r *entryResource) StreamCompressed(w io.Writer) (int64, *ResourceError) {
i, err := r.entry.StreamCompressed(w)
if err == nil {
return i, nil
}
return -1, Other(err)
}

// Length implements Resource
func (r *entryResource) Length() (int64, *ResourceError) {
return int64(r.entry.Length()), nil
Expand Down
29 changes: 29 additions & 0 deletions pkg/fetcher/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ package fetcher
import (
"encoding/json"
"encoding/xml"
"errors"
"fmt"
"io"
"net/http"
"os"
"strings"

"github.com/readium/go-toolkit/pkg/archive"
"github.com/readium/go-toolkit/pkg/manifest"
"github.com/readium/xmlquery"
"golang.org/x/text/encoding/unicode"
Expand Down Expand Up @@ -365,6 +367,33 @@ func (r ProxyResource) ReadAsXML(prefixes map[string]string) (*xmlquery.Node, *R
return r.Res.ReadAsXML(prefixes)
}

// CompressedAs implements CompressedResource
func (r ProxyResource) CompressedAs(compressionMethod archive.CompressionMethod) bool {
cres, ok := r.Res.(CompressedResource)
if !ok {
return false
}
return cres.CompressedAs(compressionMethod)
}

// CompressedLength implements CompressedResource
func (r ProxyResource) CompressedLength() int64 {
cres, ok := r.Res.(CompressedResource)
if !ok {
return -1
}
return cres.CompressedLength()
}

// StreamCompressed implements CompressedResource
func (r ProxyResource) StreamCompressed(w io.Writer) (int64, *ResourceError) {
cres, ok := r.Res.(CompressedResource)
if !ok {
return -1, Other(errors.New("resource is not compressed"))
}
return cres.StreamCompressed(w)
}

/**
* Transforms the bytes of [resource] on-the-fly.
*
Expand Down
13 changes: 13 additions & 0 deletions pkg/fetcher/traits.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package fetcher

import (
"io"

"github.com/readium/go-toolkit/pkg/archive"
)

type CompressedResource interface {
CompressedAs(compressionMethod archive.CompressionMethod) bool
CompressedLength() int64
StreamCompressed(w io.Writer) (int64, *ResourceError)
}
53 changes: 43 additions & 10 deletions pkg/parser/epub/deobfuscator.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strings"

"github.com/pkg/errors"
"github.com/readium/go-toolkit/pkg/archive"
"github.com/readium/go-toolkit/pkg/fetcher"
)

Expand All @@ -32,15 +33,23 @@ type DeobfuscatingResource struct {
identifier string
}

func (d DeobfuscatingResource) Read(start, end int64) ([]byte, *fetcher.ResourceError) {
func (d DeobfuscatingResource) obfuscation() (string, int64) {
algorithm := ""
penc := d.Res.Link().Properties.Encryption()
if penc != nil {
algorithm = penc.Algorithm
}

v, ok := algorithm2length[algorithm]
if ok {
if !ok {
return algorithm, 0
}
return algorithm, v
}

func (d DeobfuscatingResource) Read(start, end int64) ([]byte, *fetcher.ResourceError) {
algorithm, v := d.obfuscation()
if v > 0 {
data, err := d.ProxyResource.Read(start, end)
if err != nil {
return nil, err
Expand All @@ -62,14 +71,8 @@ func (d DeobfuscatingResource) Read(start, end int64) ([]byte, *fetcher.Resource
}

func (d DeobfuscatingResource) Stream(w io.Writer, start int64, end int64) (int64, *fetcher.ResourceError) {
algorithm := ""
penc := d.Res.Link().Properties.Encryption()
if penc != nil {
algorithm = penc.Algorithm
}

v, ok := algorithm2length[algorithm]
if ok {
algorithm, v := d.obfuscation()
if v > 0 {
if start >= v {
// We're past the obfuscated part, just proxy it
return d.ProxyResource.Stream(w, start, end)
Expand Down Expand Up @@ -141,6 +144,36 @@ func (d DeobfuscatingResource) Stream(w io.Writer, start int64, end int64) (int6
return d.ProxyResource.Stream(w, start, end)
}

// CompressedAs implements CompressedResource
func (d DeobfuscatingResource) CompressedAs(compressionMethod archive.CompressionMethod) bool {
_, v := d.obfuscation()
if v > 0 {
return false
}

return d.ProxyResource.CompressedAs(compressionMethod)
}

// CompressedLength implements CompressedResource
func (d DeobfuscatingResource) CompressedLength() int64 {
_, v := d.obfuscation()
if v > 0 {
return -1
}

return d.ProxyResource.CompressedLength()
}

// StreamCompressed implements CompressedResource
func (d DeobfuscatingResource) StreamCompressed(w io.Writer) (int64, *fetcher.ResourceError) {
_, v := d.obfuscation()
if v > 0 {
return 0, fetcher.Other(errors.New("cannot stream compressed resource when obfuscated"))
}

return d.ProxyResource.StreamCompressed(w)
}

func (d DeobfuscatingResource) getHashKeyAdobe() []byte {
hexbytes, _ := hex.DecodeString(
strings.Replace(
Expand Down

0 comments on commit cbc0d22

Please sign in to comment.