From af564b912d02f9391a4158c675aef4b9b4e63933 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Mon, 15 Apr 2024 17:30:29 +0200 Subject: [PATCH] examples: verified file fetch --- examples/go.mod | 6 +- examples/verified-fetch/README.md | 27 +++ examples/verified-fetch/fetcher.go | 243 +++++++++++++++++++++++++++ examples/verified-fetch/hello.car | Bin 0 -> 107 bytes examples/verified-fetch/main.go | 53 ++++++ examples/verified-fetch/main_test.go | 39 +++++ 6 files changed, 365 insertions(+), 3 deletions(-) create mode 100644 examples/verified-fetch/README.md create mode 100644 examples/verified-fetch/fetcher.go create mode 100644 examples/verified-fetch/hello.car create mode 100644 examples/verified-fetch/main.go create mode 100644 examples/verified-fetch/main_test.go diff --git a/examples/go.mod b/examples/go.mod index c8ad299e0b..eb85831db2 100644 --- a/examples/go.mod +++ b/examples/go.mod @@ -3,11 +3,13 @@ module github.com/ipfs/boxo/examples go 1.21 require ( - github.com/ipfs/boxo v0.13.1 + github.com/ipfs/boxo v0.18.0 github.com/ipfs/go-block-format v0.2.0 github.com/ipfs/go-cid v0.4.1 github.com/ipfs/go-datastore v0.6.0 + github.com/ipfs/go-unixfsnode v1.9.0 github.com/ipld/go-car/v2 v2.13.1 + github.com/ipld/go-codec-dagpb v1.6.0 github.com/ipld/go-ipld-prime v0.21.0 github.com/libp2p/go-libp2p v0.33.2 github.com/libp2p/go-libp2p-routing-helpers v0.7.3 @@ -75,10 +77,8 @@ require ( github.com/ipfs/go-merkledag v0.11.0 // indirect github.com/ipfs/go-metrics-interface v0.0.1 // indirect github.com/ipfs/go-peertaskqueue v0.8.1 // indirect - github.com/ipfs/go-unixfsnode v1.9.0 // indirect github.com/ipfs/go-verifcid v0.0.3 // indirect github.com/ipld/go-car v0.6.2 // indirect - github.com/ipld/go-codec-dagpb v1.6.0 // indirect github.com/jackpal/go-nat-pmp v1.0.2 // indirect github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect github.com/jbenet/goprocess v0.1.4 // indirect diff --git a/examples/verified-fetch/README.md b/examples/verified-fetch/README.md new file mode 100644 index 0000000000..9c9b5266ca --- /dev/null +++ b/examples/verified-fetch/README.md @@ -0,0 +1,27 @@ +# Verified File Fetch + +This example shows how to download a UnixFS file from a gateway that implements +the [Trustless Gateway](https://specs.ipfs.tech/http-gateways/trustless-gateway/) +specification, in a trustless, verifiable manner. + +This example does not yet support downloading UnixFS directories, since that becomes +more complex. For now, we would suggest reading the [`extract.go`](https://github.com/ipld/go-car/blob/master/cmd/car/extract.go) +file from `go-car` in order to understand how to convert a directory into a file system. + +## Build + +```bash +> go build -o verified-fetch +``` + +## Usage + +First, you need a gateway that complies with the Trustless Gateway specification. +In our specific case, we need that the gateway supports both the CAR file format, +as well as verifiable IPNS records, in the case we fetch from an `/ipns` URL. + +As an example, you can verifiably fetch a `hello.txt` file: + +``` +./verified-fetch -o hello.txt /ipfs/bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e +``` diff --git a/examples/verified-fetch/fetcher.go b/examples/verified-fetch/fetcher.go new file mode 100644 index 0000000000..afc8910d98 --- /dev/null +++ b/examples/verified-fetch/fetcher.go @@ -0,0 +1,243 @@ +package main + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "os" + "strings" + + "github.com/ipfs/boxo/blockservice" + "github.com/ipfs/boxo/blockstore" + "github.com/ipfs/boxo/exchange/offline" + bsfetcher "github.com/ipfs/boxo/fetcher/impl/blockservice" + files "github.com/ipfs/boxo/files" + "github.com/ipfs/boxo/gateway" + "github.com/ipfs/boxo/ipld/merkledag" + unixfile "github.com/ipfs/boxo/ipld/unixfs/file" + "github.com/ipfs/boxo/namesys" + "github.com/ipfs/boxo/path" + "github.com/ipfs/boxo/path/resolver" + "github.com/ipfs/go-datastore" + dssync "github.com/ipfs/go-datastore/sync" + "github.com/ipfs/go-unixfsnode" + gocarv2 "github.com/ipld/go-car/v2" + dagpb "github.com/ipld/go-codec-dagpb" +) + +// fetcher fetches files over HTTP using verifiable CAR archives. +type fetcher struct { + gateway string + limit int64 + userAgent string + ns namesys.NameSystem +} + +type fetcherOption func(f *fetcher) error + +// withUserAgent sets the user agent for the [Fetcher]. +func withUserAgent(userAgent string) fetcherOption { + return func(f *fetcher) error { + f.userAgent = userAgent + return nil + } +} + +// withLimit sets the limit for the [Fetcher]. +func withLimit(limit int64) fetcherOption { + return func(f *fetcher) error { + f.limit = limit + return nil + } +} + +// newFetcher creates a new [Fetcher]. Setting the gateway is mandatory. +func newFetcher(gatewayURL string, options ...fetcherOption) (*fetcher, error) { + if gatewayURL == "" { + return nil, errors.New("a gateway must be set") + } + + vs, err := gateway.NewRemoteValueStore([]string{gatewayURL}, nil) + if err != nil { + return nil, err + } + + ns, err := namesys.NewNameSystem(vs) + if err != nil { + return nil, err + } + + f := &fetcher{ + gateway: strings.TrimRight(gatewayURL, "/"), + ns: ns, + } + + for _, option := range options { + if err := option(f); err != nil { + return nil, err + } + } + + return f, nil +} + +// fetch attempts to fetch the file at the given path, from the distribution +// site configured for this HttpFetcher. +func (f *fetcher) fetch(ctx context.Context, p path.Path, output string) error { + imPath, err := f.resolvePath(ctx, p) + if err != nil { + return fmt.Errorf("path could not be resolved: %w", err) + } + + rc, err := f.httpRequest(ctx, imPath, "application/vnd.ipld.car") + if err != nil { + return fmt.Errorf("failed to fetch CAR: %w", err) + } + + rc, err = carToFileStream(ctx, rc, imPath) + if err != nil { + return fmt.Errorf("failed to read car stream: %w", err) + } + + fd, err := os.OpenFile(output, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666) + if err != nil { + return fmt.Errorf("failed to open output file: %w", err) + } + + _, err = io.Copy(fd, rc) + return err +} + +func (f *fetcher) Close() error { + return nil +} + +func (f *fetcher) resolvePath(ctx context.Context, p path.Path) (path.ImmutablePath, error) { + res, err := f.ns.Resolve(ctx, p) + if err != nil { + return path.ImmutablePath{}, err + } + + imPath, err := path.NewImmutablePath(res.Path) + if err != nil { + return path.ImmutablePath{}, fmt.Errorf("could not resolve to immutable path: %w", err) + } + + return imPath, nil +} + +func (f *fetcher) httpRequest(ctx context.Context, p path.Path, accept string) (io.ReadCloser, error) { + url := f.gateway + p.String() + fmt.Printf("Fetching with HTTP: %q\n", url) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("http.NewRequest error: %w", err) + } + req.Header.Set("Accept", accept) + + if f.userAgent != "" { + req.Header.Set("User-Agent", f.userAgent) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("http.DefaultClient.Do error: %w", err) + } + + if resp.StatusCode >= 400 { + defer resp.Body.Close() + mes, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("error reading error body: %w", err) + } + return nil, fmt.Errorf("GET %s error: %s: %s", url, resp.Status, string(mes)) + } + + var rc io.ReadCloser + if f.limit > 0 { + rc = newLimitReadCloser(resp.Body, f.limit) + } else { + rc = resp.Body + } + + return rc, nil +} + +func carToFileStream(ctx context.Context, r io.ReadCloser, imPath path.ImmutablePath) (io.ReadCloser, error) { + defer r.Close() + + // Create temporary block datastore and dag service. + dataStore := dssync.MutexWrap(datastore.NewMapDatastore()) + blockStore := blockstore.NewBlockstore(dataStore) + blockService := blockservice.New(blockStore, offline.Exchange(blockStore)) + dagService := merkledag.NewDAGService(blockService) + + defer dagService.Blocks.Close() + defer dataStore.Close() + + // Create CAR reader + car, err := gocarv2.NewBlockReader(r) + if err != nil { + fmt.Println(err) + return nil, fmt.Errorf("error creating car reader: %s", err) + } + + // Add all blocks to the blockstore. + for { + block, err := car.Next() + if err != nil && err != io.EOF { + return nil, fmt.Errorf("error reading block from car: %s", err) + } else if block == nil { + break + } + + err = blockStore.Put(ctx, block) + if err != nil { + return nil, fmt.Errorf("error putting block in blockstore: %s", err) + } + } + + fetcherCfg := bsfetcher.NewFetcherConfig(blockService) + fetcherCfg.PrototypeChooser = dagpb.AddSupportToChooser(bsfetcher.DefaultPrototypeChooser) + fetcher := fetcherCfg.WithReifier(unixfsnode.Reify) + resolver := resolver.NewBasicResolver(fetcher) + + cid, _, err := resolver.ResolveToLastNode(ctx, imPath) + if err != nil { + return nil, fmt.Errorf("failed to resolve: %w", err) + } + + nd, err := dagService.Get(ctx, cid) + if err != nil { + return nil, fmt.Errorf("failed to resolve: %w", err) + } + + // Make UnixFS file out of the node. + uf, err := unixfile.NewUnixfsFile(ctx, dagService, nd) + if err != nil { + return nil, fmt.Errorf("error building unixfs file: %s", err) + } + + // Check if it's a file and return. + if f, ok := uf.(files.File); ok { + return f, nil + } + + return nil, errors.New("unexpected unixfs node type") +} + +type limitReadCloser struct { + io.Reader + io.Closer +} + +// newLimitReadCloser returns a new [io.ReadCloser] with the reader wrapped in a +// [io.LimitedReader], limiting the reading to the specified amount. +func newLimitReadCloser(rc io.ReadCloser, limit int64) io.ReadCloser { + return limitReadCloser{ + Reader: io.LimitReader(rc, limit), + Closer: rc, + } +} diff --git a/examples/verified-fetch/hello.car b/examples/verified-fetch/hello.car new file mode 100644 index 0000000000000000000000000000000000000000..b284068a8a5235053e38d5181d51033bef6a21a6 GIT binary patch literal 107 zcmcColv") + flag.PrintDefaults() + } + + gatewayUrlPtr := flag.String("g", "https://trustless-gateway.link", "trustless gateway to download the CAR file from") + userAgentPtr := flag.String("u", "", "user agent to use during the HTTP requests") + outputPtr := flag.String("o", "out", "output path to store the fetched path") + limitPtr := flag.Int64("l", 0, "file size limit for the gateway download") + flag.Parse() + + ipfsPath := flag.Arg(0) + if len(ipfsPath) == 0 { + flag.Usage() + os.Exit(1) + } + + if err := run(*gatewayUrlPtr, ipfsPath, *outputPtr, *userAgentPtr, *limitPtr); err != nil { + log.Fatal(err) + } +} + +func run(gatewayURL, ipfsPath, output, userAgent string, limit int64) error { + p, err := path.NewPath(ipfsPath) + if err != nil { + return err + } + + options := []fetcherOption{ + withUserAgent(userAgent), + withLimit(limit), + } + + f, err := newFetcher(gatewayURL, options...) + if err != nil { + return err + } + + return f.fetch(context.Background(), p, output) +} diff --git a/examples/verified-fetch/main_test.go b/examples/verified-fetch/main_test.go new file mode 100644 index 0000000000..c7fab800bf --- /dev/null +++ b/examples/verified-fetch/main_test.go @@ -0,0 +1,39 @@ +package main + +import ( + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +const ( + HelloWorldCID = "bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e" +) + +func TestErrorOnInvalidContent(t *testing.T) { + rs := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("wrong data")) + })) + t.Cleanup(rs.Close) + + err := run(rs.URL, "/ipfs/"+HelloWorldCID, "hello.txt", "", 0) + require.Error(t, err) + +} + +func TestSuccessOnValidContent(t *testing.T) { + data, err := os.ReadFile("./hello.car") + require.NoError(t, err) + + rs := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(data) + })) + t.Cleanup(rs.Close) + + err = run(rs.URL, "/ipfs/"+HelloWorldCID, filepath.Join(t.TempDir(), "hello.txt"), "", 0) + require.NoError(t, err) +}