Skip to content

Commit

Permalink
Add HTTP retry actor and simple version of fetch
Browse files Browse the repository at this point in the history
  • Loading branch information
surilindur committed Sep 30, 2024
1 parent 673268d commit b1e1f94
Show file tree
Hide file tree
Showing 15 changed files with 1,129 additions and 0 deletions.
41 changes: 41 additions & 0 deletions packages/actor-http-fetch/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Comunica Fetch HTTP Actor

[![npm version](https://badge.fury.io/js/%40comunica%2Factor-http-fetch.svg)](https://www.npmjs.com/package/@comunica/actor-http-fetch)

An [HTTP](https://github.com/comunica/comunica/tree/master/packages/bus-http) actor that
uses [fetch](https://fetch.spec.whatwg.org/) to perform HTTP requests.

This module is part of the [Comunica framework](https://github.com/comunica/comunica),
and should only be used by [developers that want to build their own query engine](https://comunica.dev/docs/modify/).

When this actor is used, a custom fetch implementation may be provided via the context (`fetch`).
If none is provided, the global `fetch` implementation from the runtime or a polyfill is used.

[Click here if you just want to query with Comunica](https://comunica.dev/docs/query/).

## Install

```bash
$ yarn add @comunica/actor-http-fetch
```

## Configure

After installing, this package can be added to your engine's configuration as follows:
```json
{
"@context": [
"https://linkedsoftwaredependencies.org/bundles/npm/@comunica/actor-http-fetch/^3.0.0/components/context.jsonld"
],
"actors": [
{
"@id": "urn:comunica:default:http/actors#fetch",
"@type": "ActorHttpFetch"
}
]
}
```

### Config Parameters

* `agentOptions`: The agent JSON options for the HTTP agent in Node.js environments. _(optional)_
164 changes: 164 additions & 0 deletions packages/actor-http-fetch/lib/ActorHttpFetch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import type { IActionHttp, IActorHttpOutput, IActorHttpArgs } from '@comunica/bus-http';
import { ActorHttp } from '@comunica/bus-http';
import { KeysHttp } from '@comunica/context-entries';
import type { TestResult } from '@comunica/core';
import { passTest } from '@comunica/core';
import type { IMediatorTypeTime } from '@comunica/mediatortype-time';

// eslint-disable-next-line import/extensions
import { version as actorVersion } from '../package.json';

import { FetchInitPreprocessor } from './FetchInitPreprocessor';
import type { IFetchInitPreprocessor } from './IFetchInitPreprocessor';

type Fetch = (input: RequestInfo | URL, init?: RequestInit) => Promise<Response>;

export class ActorHttpFetch extends ActorHttp {
private readonly fetchInitPreprocessor: IFetchInitPreprocessor;

private static readonly userAgent = ActorHttpFetch.createUserAgent('ActorHttpFetch', actorVersion);

public constructor(args: IActorHttpFetchArgs) {
super(args);
this.fetchInitPreprocessor = new FetchInitPreprocessor(args.agentOptions);
}

public async test(_action: IActionHttp): Promise<TestResult<IMediatorTypeTime>> {
return passTest({ time: Number.POSITIVE_INFINITY });
}

public async run(action: IActionHttp): Promise<IActorHttpOutput> {
const headers = this.prepareRequestHeaders(action);

const init: RequestInit = { method: 'GET', ...action.init, headers };

this.logInfo(action.context, `Requesting ${ActorHttpFetch.getInputUrl(action.input).href}`, () => ({
headers: ActorHttp.headersToHash(headers),
method: init.method,
}));

// TODO: remove this workaround once this has a fix: https://github.com/inrupt/solid-client-authn-js/issues/1708
if (action.context.has(KeysHttp.fetch)) {
init.headers = ActorHttp.headersToHash(headers);
}

if (action.context.get(KeysHttp.includeCredentials)) {
init.credentials = 'include';
}

const httpTimeout = action.context.get<number>(KeysHttp.httpTimeout);
const httpBodyTimeout = action.context.get<boolean>(KeysHttp.httpBodyTimeout);
const fetchFunction = action.context.get<Fetch>(KeysHttp.fetch) ?? fetch;
const requestInit = await this.fetchInitPreprocessor.handle(init);

let timeoutCallback: () => void;
let timeoutHandle: NodeJS.Timeout | undefined;

if (httpTimeout) {
const abortController = new AbortController();
requestInit.signal = abortController.signal;
timeoutCallback = () => abortController.abort(new Error(`Fetch timed out for ${ActorHttpFetch.getInputUrl(action.input).href} after ${httpTimeout} ms`));
timeoutHandle = setTimeout(() => timeoutCallback(), httpTimeout);
}

const response = await fetchFunction(action.input, requestInit);

if (httpTimeout && (!httpBodyTimeout || !response.body)) {
clearTimeout(timeoutHandle);
}

return response;
}

/**
* Prepares the request headers, taking into account the environment.
* @param {IActionHttp} action The HTTP action
* @returns {Headers} Headers
*/
public prepareRequestHeaders(action: IActionHttp): Headers {
const headers = new Headers(action.init?.headers);

if (ActorHttp.isBrowser()) {
// When running in a browser, the User-Agent header should never be set
headers.delete('user-agent');
} else if (!headers.has('user-agent')) {
// Otherwise, if no header value is provided, use the actor one
headers.set('user-agent', ActorHttpFetch.userAgent!);
}

const authString = action.context.get<string>(KeysHttp.auth);
if (authString) {
headers.set('Authorization', `Basic ${Buffer.from(authString).toString('base64')}`);
}

return headers;
}

/**
* Extract the requested URL from the action input.
* @param {RequestInfo | URL} input The request input.
* @returns {URL} The extracted URL.
*/
public static getInputUrl(input: RequestInfo | URL): URL {
return new URL(input instanceof Request ? input.url : input);
}

/**
* Creates an appropriate User-Agent header string for Node.js or other environments.
* Within browsers, returns undefined, because the value should not be overridden due to potential CORS issues.
*/
public static createUserAgent(actorName: string, actorVersion: string): string | undefined {
if (!ActorHttp.isBrowser()) {
const versions = [
`Comunica/${actorVersion.split('.')[0]}.0`,
`${actorName}/${actorVersion}`,
];

if (typeof globalThis.navigator === 'object' && typeof globalThis.navigator.userAgent === 'string') {
// Most runtimes like Node.js 21+, Deno and Bun implement navigator.userAgent
versions.push(globalThis.navigator.userAgent);
} else if (
typeof globalThis.process === 'object' &&
typeof globalThis.process.versions === 'object' &&
typeof globalThis.process.versions.node === 'string'
) {
// TODO: remove this entire 'else if' when support for Node.js 20 is dropped, this only exists for that one
versions.push(`Node.js/${globalThis.process.versions.node.split('.')[0]}`);
}

if (
typeof globalThis.process === 'object' &&
typeof globalThis.process.platform === 'string' &&
typeof globalThis.process.arch === 'string'
) {
versions.splice(1, 0, `(${globalThis.process.platform}; ${globalThis.process.arch})`);
}

return versions.join(' ');
}
}

/**
* Attempts to determine whether the current environment is a browser or not.
* @returns {boolean} True for browsers and web workers, false for other runtimes.
*/
public static isBrowser(): boolean {
return (
// The window global and the document are available in browsers, but not in web workers
// https://developer.mozilla.org/en-US/docs/Glossary/Global_object
(typeof globalThis.window === 'object' && typeof globalThis.window.document === 'object') ||
// The importScripts function is only available in Web Workers
// https://developer.mozilla.org/en-US/docs/Web/API/WorkerGlobalScope/importScripts
(typeof (<any>globalThis).importScripts === 'function')
);
}
}

export interface IActorHttpFetchArgs extends IActorHttpArgs {
/**
* The agent options for the HTTP agent
* @range {json}
* @default {{ "keepAlive": true, "maxSockets": 5 }}
*/
agentOptions?: Record<string, any>;
}
30 changes: 30 additions & 0 deletions packages/actor-http-fetch/lib/FetchInitPreprocessor-browser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import type { IFetchInitPreprocessor } from './IFetchInitPreprocessor';

/**
* Overrides things for fetch requests in browsers
*/
export class FetchInitPreprocessor implements IFetchInitPreprocessor {
public async handle(init: RequestInit): Promise<RequestInit> {
// Browsers don't yet support passing ReadableStream as body to requests, see
// https://bugs.chromium.org/p/chromium/issues/detail?id=688906
// https://bugzilla.mozilla.org/show_bug.cgi?id=1387483
// As such, we convert those bodies to a plain string
// TODO: remove this once browser support ReadableStream in requests
if (init.body && typeof init.body !== 'string' && 'getReader' in init.body) {
const reader = init.body.getReader();
const chunks = [];

while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
chunks.push(value);
}
init.body = chunks.join('');
}

// Only enable keepalive functionality if we are not sending a body (some browsers seem to trip over this)
return { ...init, keepalive: !init.body };
}
}
30 changes: 30 additions & 0 deletions packages/actor-http-fetch/lib/FetchInitPreprocessor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* eslint-disable import/no-nodejs-modules */
import { Agent as HttpAgent } from 'node:http';
import { Agent as HttpsAgent } from 'node:https';

/* eslint-enable import/no-nodejs-modules */
import type { IFetchInitPreprocessor } from './IFetchInitPreprocessor';

/**
* Overrides the HTTP agent to perform better in Node.js.
*/
export class FetchInitPreprocessor implements IFetchInitPreprocessor {
private readonly agent: (url: URL) => HttpAgent;

public constructor(agentOptions: any) {
const httpAgent = new HttpAgent(agentOptions);
const httpsAgent = new HttpsAgent(agentOptions);
this.agent = (_parsedURL: URL): HttpAgent => _parsedURL.protocol === 'http:' ? httpAgent : httpsAgent;
}

public async handle(init: RequestInit): Promise<RequestInit & { agent: (url: URL) => HttpAgent }> {
// The Fetch API requires specific options to be set when sending body streams:
// - 'keepalive' can not be true
// - 'duplex' must be set to 'half'
return {
...init,
...init.body ? { keepalive: false, duplex: 'half' } : { keepalive: true },
agent: this.agent,
};
}
}
6 changes: 6 additions & 0 deletions packages/actor-http-fetch/lib/IFetchInitPreprocessor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/**
* Can modify a fetch init object.
*/
export interface IFetchInitPreprocessor {
handle: (init: RequestInit) => Promise<RequestInit>;
}
1 change: 1 addition & 0 deletions packages/actor-http-fetch/lib/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './ActorHttpFetch';
50 changes: 50 additions & 0 deletions packages/actor-http-fetch/lib/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"name": "@comunica/actor-http-fetch",
"version": "3.2.1",
"description": "A fetch http actor",
"lsd:module": true,
"license": "MIT",
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/comunica-association"
},
"homepage": "https://comunica.dev/",
"repository": {
"type": "git",
"url": "https://github.com/comunica/comunica.git",
"directory": "packages/actor-http-fetch"
},
"bugs": {
"url": "https://github.com/comunica/comunica/issues"
},
"keywords": [
"comunica",
"runner"
],
"sideEffects": false,
"main": "lib/index.js",
"typings": "lib/index",
"publishConfig": {
"access": "public"
},
"files": [
"components",
"lib/**/*.d.ts",
"lib/**/*.js",
"lib/**/*.js.map"
],
"scripts": {
"build": "yarn run build:ts && yarn run build:components",
"build:ts": "node \"../../node_modules/typescript/bin/tsc\"",
"build:components": "componentsjs-generator"
},
"dependencies": {
"@comunica/bus-http": "^3.2.1",
"@comunica/context-entries": "^3.2.1",
"@comunica/core": "^3.2.1",
"@comunica/mediatortype-time": "^3.2.1"
},
"browser": {
"./lib/FetchInitPreprocessor.js": "./lib/FetchInitPreprocessor-browser.js"
}
}
Loading

0 comments on commit b1e1f94

Please sign in to comment.