Skip to content

Commit

Permalink
feat: require content-type parser to set content-type (#423)
Browse files Browse the repository at this point in the history
* adds `contentTypeParser` function to createVerifiedFetch options & implements it.
* renamed `getStreamAndContentType` to `getStreamFromAsyncIterable` that now returns a stream with the firstChunk seen, so we can pass it to the `contentTypeParser` function.
* updates tests in packages/verified-fetch & packages/interop
* updates packageDocumentation with example

Related #416
Fixes #422
---------

Co-authored-by: achingbrain <alex@achingbrain.net>
  • Loading branch information
SgtPooki and achingbrain authored Feb 8, 2024
1 parent 3851fe2 commit f58d467
Show file tree
Hide file tree
Showing 13 changed files with 311 additions and 134 deletions.
1 change: 1 addition & 0 deletions packages/interop/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
"kubo": "^0.26.0",
"kubo-rpc-client": "^3.0.2",
"libp2p": "^1.2.1",
"magic-bytes.js": "^1.8.0",
"multiformats": "^13.0.1",
"p-defer": "^4.0.0",
"uint8arrays": "^5.0.1",
Expand Down
28 changes: 24 additions & 4 deletions packages/interop/src/verified-fetch-unixfs-dir.spec.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
/* eslint-env mocha */
import { createVerifiedFetch } from '@helia/verified-fetch'
import { expect } from 'aegir/chai'
import { filetypemime } from 'magic-bytes.js'
import { createKuboNode } from './fixtures/create-kubo.js'
import { loadFixtureDataCar } from './fixtures/load-fixture-data.js'
import type { VerifiedFetch } from '@helia/verified-fetch'
import type { Controller } from 'ipfsd-ctl'

describe('@helia/verified-fetch - unixfs directory', () => {
let controller: Controller
let verifiedFetch: Awaited<ReturnType<typeof createVerifiedFetch>>
let verifiedFetch: VerifiedFetch

before(async () => {
controller = await createKuboNode()
await controller.start()

verifiedFetch = await createVerifiedFetch({
gateways: [`http://${controller.api.gatewayHost}:${controller.api.gatewayPort}`],
// Temporarily disabling delegated routers in browser until CORS issue is fixed. see https://github.com/ipshipyard/waterworks-community/issues/4
Expand Down Expand Up @@ -42,19 +45,37 @@ describe('@helia/verified-fetch - unixfs directory', () => {
expect(resp).to.be.ok()
const text = await resp.text()
expect(text).to.equal('Don\'t we all.')
expect(resp.headers.get('content-type')).to.equal('text/plain')
})

it('can return an image for unixfs pathed data', async () => {
const resp = await verifiedFetch('ipfs://QmbQDovX7wRe9ek7u6QXe9zgCXkTzoUSsTFJEkrYV1HrVR/1 - Barrel - Part 1.png')
expect(resp).to.be.ok()
expect(resp.headers.get('content-type')).to.equal('image/png')
const imgData = await resp.blob()
expect(imgData).to.be.ok()
expect(imgData.size).to.equal(24848)
})
})

describe('content type parser', () => {
before(async () => {
await verifiedFetch.stop()
verifiedFetch = await createVerifiedFetch({
gateways: [`http://${controller.api.gatewayHost}:${controller.api.gatewayPort}`],
// Temporarily disabling delegated routers in browser until CORS issue is fixed. see https://github.com/ipshipyard/waterworks-community/issues/4
routers: process.env.RUNNER_ENV === 'node' ? [`http://${controller.api.gatewayHost}:${controller.api.gatewayPort}`] : [],
contentTypeParser: (bytes) => {
return filetypemime(bytes)?.[0]
}
})
})

it('can return an image content-type for unixfs pathed data', async () => {
const resp = await verifiedFetch('ipfs://QmbQDovX7wRe9ek7u6QXe9zgCXkTzoUSsTFJEkrYV1HrVR/1 - Barrel - Part 1.png')
// tediously this is actually a jpeg file with a .png extension
expect(resp.headers.get('content-type')).to.equal('image/jpeg')
})
})

// TODO: find a smaller car file so the test doesn't timeout locally or flake on CI
describe.skip('HAMT-sharded directory', () => {
before(async () => {
Expand All @@ -65,7 +86,6 @@ describe('@helia/verified-fetch - unixfs directory', () => {
it('loads path /ipfs/bafybeidbclfqleg2uojchspzd4bob56dqetqjsj27gy2cq3klkkgxtpn4i/685.txt', async () => {
const resp = await verifiedFetch('ipfs://bafybeidbclfqleg2uojchspzd4bob56dqetqjsj27gy2cq3klkkgxtpn4i/685.txt')
expect(resp).to.be.ok()
expect(resp.headers.get('content-type')).to.equal('text/plain')
const text = await resp.text()
// npx kubo@0.25.0 cat '/ipfs/bafybeidbclfqleg2uojchspzd4bob56dqetqjsj27gy2cq3klkkgxtpn4i/685.txt'
expect(text).to.equal(`Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc non imperdiet nunc. Proin ac quam ut nibh eleifend aliquet. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Sed ligula dolor, imperdiet sagittis arcu et, semper tincidunt urna. Donec et tempor augue, quis sollicitudin metus. Curabitur semper ullamcorper aliquet. Mauris hendrerit sodales lectus eget fermentum. Proin sollicitudin vestibulum commodo. Vivamus nec lectus eu augue aliquet dignissim nec condimentum justo. In hac habitasse platea dictumst. Mauris vel sem neque.
Expand Down
8 changes: 8 additions & 0 deletions packages/verified-fetch/.aegir.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/** @type {import('aegir').PartialOptions} */
const options = {
build: {
bundlesizeMax: '132KB'
}
}

export default options
24 changes: 24 additions & 0 deletions packages/verified-fetch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,30 @@ const resp = await fetch('ipfs://bafy...')
const json = await resp.json()
```

### Custom content-type parsing

By default, `@helia/verified-fetch` sets the `Content-Type` header as `application/octet-stream` - this is because the `.json()`, `.text()`, `.blob()`, and `.arrayBuffer()` methods will usually work as expected without a detailed content type.

If you require an accurate content-type you can provide a `contentTypeParser` function as an option to `createVerifiedFetch` to handle parsing the content type.

The function you provide will be called with the first chunk of bytes from the file and should return a string or a promise of a string.

## Example - Customizing content-type parsing

```typescript
import { createVerifiedFetch } from '@helia/verified-fetch'
import { fileTypeFromBuffer } from '@sgtpooki/file-type'

const fetch = await createVerifiedFetch({
gateways: ['https://trustless-gateway.link'],
routers: ['http://delegated-ipfs.dev'],
contentTypeParser: async (bytes) => {
// call to some magic-byte recognition library like magic-bytes, file-type, or your own custom byte recognition
return fileTypeFromBuffer(bytes)?.mime
}
})
```

## Comparison to fetch

This module attempts to act as similarly to the `fetch()` API as possible.
Expand Down
7 changes: 4 additions & 3 deletions packages/verified-fetch/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -157,19 +157,20 @@
"@libp2p/peer-id": "^4.0.5",
"hashlru": "^2.3.0",
"ipfs-unixfs-exporter": "^13.5.0",
"mime-types": "^2.1.35",
"multiformats": "^13.0.1",
"progress-events": "^1.0.0"
},
"devDependencies": {
"@libp2p/logger": "^4.0.5",
"@libp2p/peer-id-factory": "^4.0.5",
"@types/mime-types": "^2.1.4",
"@sgtpooki/file-type": "^1.0.1",
"@types/sinon": "^17.0.3",
"aegir": "^42.2.2",
"helia": "^4.0.1",
"magic-bytes.js": "^1.8.0",
"sinon": "^17.0.1",
"sinon-ts": "^2.0.0"
"sinon-ts": "^2.0.0",
"uint8arrays": "^5.0.1"
},
"sideEffects": false
}
66 changes: 59 additions & 7 deletions packages/verified-fetch/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
* const fetch = await createVerifiedFetch({
* gateways: ['https://trustless-gateway.link'],
* routers: ['http://delegated-ipfs.dev']
*})
* })
*
* const resp = await fetch('ipfs://bafy...')
*
Expand Down Expand Up @@ -112,6 +112,31 @@
* const json = await resp.json()
* ```
*
* ### Custom content-type parsing
*
* By default, `@helia/verified-fetch` sets the `Content-Type` header as `application/octet-stream` - this is because the `.json()`, `.text()`, `.blob()`, and `.arrayBuffer()` methods will usually work as expected without a detailed content type.
*
* If you require an accurate content-type you can provide a `contentTypeParser` function as an option to `createVerifiedFetch` to handle parsing the content type.
*
* The function you provide will be called with the first chunk of bytes from the file and should return a string or a promise of a string.
*
* @example Customizing content-type parsing
*
* ```typescript
* import { createVerifiedFetch } from '@helia/verified-fetch'
* import { fileTypeFromBuffer } from '@sgtpooki/file-type'
*
* const fetch = await createVerifiedFetch({
* gateways: ['https://trustless-gateway.link'],
* routers: ['http://delegated-ipfs.dev'],
* contentTypeParser: async (bytes) => {
* // call to some magic-byte recognition library like magic-bytes, file-type, or your own custom byte recognition
* const result = await fileTypeFromBuffer(bytes)
* return result?.mime
* }
* })
* ```
*
* ## Comparison to fetch
*
* This module attempts to act as similarly to the `fetch()` API as possible.
Expand Down Expand Up @@ -257,11 +282,34 @@ export interface VerifiedFetch {
}

/**
* Instead of passing a Helia instance, you can pass a list of gateways and routers, and a HeliaHTTP instance will be created for you.
* Instead of passing a Helia instance, you can pass a list of gateways and
* routers, and a HeliaHTTP instance will be created for you.
*/
export interface CreateVerifiedFetchWithOptions {
export interface CreateVerifiedFetchOptions {
gateways: string[]
routers?: string[]

/**
* A function to handle parsing content type from bytes. The function you
* provide will be passed the first set of bytes we receive from the network,
* and should return a string that will be used as the value for the
* `Content-Type` header in the response.
*/
contentTypeParser?: ContentTypeParser
}

/**
* A ContentTypeParser attempts to return the mime type of a given file. It
* receives the first chunk of the file data and the file name, if it is
* available. The function can be sync or async and if it returns/resolves to
* `undefined`, `application/octet-stream` will be used.
*/
export interface ContentTypeParser {
/**
* Attempt to determine a mime type, either via of the passed bytes or the
* filename if it is available.
*/
(bytes: Uint8Array, fileName?: string): Promise<string | undefined> | string | undefined
}

export type BubbledProgressEvents =
Expand All @@ -280,17 +328,21 @@ export type VerifiedFetchProgressEvents =
/**
* Options for the `fetch` function returned by `createVerifiedFetch`.
*
* This method accepts all the same options as the `fetch` function in the browser, plus an `onProgress` option to
* listen for progress events.
* This interface contains all the same fields as the [options object](https://developer.mozilla.org/en-US/docs/Web/API/fetch#options)
* passed to `fetch` in browsers, plus an `onProgress` option to listen for
* progress events.
*/
export interface VerifiedFetchInit extends RequestInit, ProgressOptions<BubbledProgressEvents | VerifiedFetchProgressEvents> {
}

/**
* Create and return a Helia node
*/
export async function createVerifiedFetch (init?: Helia | CreateVerifiedFetchWithOptions): Promise<VerifiedFetch> {
export async function createVerifiedFetch (init?: Helia | CreateVerifiedFetchOptions): Promise<VerifiedFetch> {
let contentTypeParser: ContentTypeParser | undefined

if (!isHelia(init)) {
contentTypeParser = init?.contentTypeParser
init = await createHeliaHTTP({
blockBrokers: [
trustlessGateway({
Expand All @@ -301,7 +353,7 @@ export async function createVerifiedFetch (init?: Helia | CreateVerifiedFetchWit
})
}

const verifiedFetchInstance = new VerifiedFetchClass({ helia: init })
const verifiedFetchInstance = new VerifiedFetchClass({ helia: init }, { contentTypeParser })
async function verifiedFetch (resource: Resource, options?: VerifiedFetchInit): Promise<Response> {
return verifiedFetchInstance.fetch(resource, options)
}
Expand Down
55 changes: 0 additions & 55 deletions packages/verified-fetch/src/utils/get-content-type.ts

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
import { CustomProgressEvent } from 'progress-events'
import { getContentType } from './get-content-type.js'
import type { VerifiedFetchInit } from '../index.js'
import type { ComponentLogger } from '@libp2p/interface'

/**
* Converts an async iterator of Uint8Array bytes to a stream and attempts to determine the content type of those bytes.
* Converts an async iterator of Uint8Array bytes to a stream and returns the first chunk of bytes
*/
export async function getStreamAndContentType (iterator: AsyncIterable<Uint8Array>, path: string, logger: ComponentLogger, options?: Pick<VerifiedFetchInit, 'onProgress'>): Promise<{ contentType: string, stream: ReadableStream<Uint8Array> }> {
const log = logger.forComponent('helia:verified-fetch:get-stream-and-content-type')
export async function getStreamFromAsyncIterable (iterator: AsyncIterable<Uint8Array>, path: string, logger: ComponentLogger, options?: Pick<VerifiedFetchInit, 'onProgress'>): Promise<{ stream: ReadableStream<Uint8Array>, firstChunk: Uint8Array }> {
const log = logger.forComponent('helia:verified-fetch:get-stream-from-async-iterable')
const reader = iterator[Symbol.asyncIterator]()
const { value, done } = await reader.next()
const { value: firstChunk, done } = await reader.next()

if (done === true) {
log.error('No content found for path', path)
throw new Error('No content found')
}

const contentType = await getContentType({ bytes: value, path })
const stream = new ReadableStream({
async start (controller) {
// the initial value is already available
options?.onProgress?.(new CustomProgressEvent<void>('verified-fetch:request:progress:chunk'))
controller.enqueue(value)
controller.enqueue(firstChunk)
},
async pull (controller) {
const { value, done } = await reader.next()
Expand All @@ -40,5 +38,8 @@ export async function getStreamAndContentType (iterator: AsyncIterable<Uint8Arra
}
})

return { contentType, stream }
return {
stream,
firstChunk
}
}
Loading

0 comments on commit f58d467

Please sign in to comment.