diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index cc8a5f4..02fb7c8 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go: ["1.21", "1.20"] + go: ["1.21"] steps: - uses: actions/checkout@v2 diff --git a/README.md b/README.md index 4d39ceb..b702bc0 100644 --- a/README.md +++ b/README.md @@ -69,10 +69,6 @@ The `docd` tool runs as either: ### Optional flags - `addr` - the bind address for the HTTP server, default is ":8888" -- `log-level` - - 0: errors & critical info - - 1: inclues 0 and logs each request as well - - 2: include 1 and logs the response payloads - `readability-length-low` - sets the readability length low if the ?readability=1 parameter is set - `readability-length-high` - sets the readability length high if the ?readability=1 parameter is set - `readability-stopwords-low` - sets the readability stopwords low if the ?readability=1 parameter is set @@ -83,11 +79,8 @@ The `docd` tool runs as either: ### How to start the service - $ # This will only log errors and critical info - $ docd -log-level 0 - - $ # This will run on port 8000 and log each request - $ docd -addr :8000 -log-level 1 + $ # This runs on port 8000 + $ docd -addr :8000 ## Example usage (code) @@ -104,7 +97,6 @@ package main import ( "fmt" - "log" "code.sajari.com/docconv" ) @@ -112,7 +104,7 @@ import ( func main() { res, err := docconv.ConvertPath("your-file.pdf") if err != nil { - log.Fatal(err) + // TODO: handle } fmt.Println(res) } @@ -125,7 +117,6 @@ package main import ( "fmt" - "log" "code.sajari.com/docconv/client" ) @@ -136,7 +127,7 @@ func main() { res, err := client.ConvertPath(c, "your-file.pdf") if err != nil { - log.Fatal(err) + // TODO: handle } fmt.Println(res) } diff --git a/docd/appengine/Dockerfile b/docd/appengine/Dockerfile index 369fb11..ad342a2 100644 --- a/docd/appengine/Dockerfile +++ b/docd/appengine/Dockerfile @@ -2,4 +2,4 @@ # of its runtime dependencies. # https://cloud.google.com/appengine/docs/flexible/custom-runtimes/about-custom-runtimes FROM sajari/docd:1.3.7 -CMD ["-addr", ":8080", "-error-reporting"] +CMD ["-addr", ":8080", "-json-cloud-logging", "-error-reporting"] diff --git a/docd/convert.go b/docd/convert.go index 2d3a93f..c01a42f 100644 --- a/docd/convert.go +++ b/docd/convert.go @@ -6,7 +6,7 @@ import ( "encoding/json" "fmt" "io" - "log" + "log/slog" "net/http" "os" @@ -17,6 +17,7 @@ import ( ) type convertServer struct { + l *slog.Logger er internal.ErrorReporter } @@ -27,9 +28,7 @@ func (s *convertServer) convert(w http.ResponseWriter, r *http.Request) { var readability bool if r.FormValue("readability") == "1" { readability = true - if *logLevel >= 2 { - log.Println("Readability is on") - } + s.l.DebugContext(ctx, "Readability is on") } path := r.FormValue("path") @@ -73,9 +72,7 @@ func (s *convertServer) convert(w http.ResponseWriter, r *http.Request) { mimeType = docconv.MimeTypeByExtension(info.Filename) } - if *logLevel >= 1 { - log.Printf("Received file: %v (%v)", info.Filename, mimeType) - } + s.l.InfoContext(ctx, "Received file", "filename", info.Filename, "mimeType", mimeType) data, err := docconv.Convert(file, mimeType, readability) if err != nil { @@ -91,7 +88,7 @@ func (s *convertServer) clientError(ctx context.Context, w http.ResponseWriter, Error: fmt.Sprintf(pattern, args...), }) - log.Printf(pattern, args...) + s.l.InfoContext(ctx, fmt.Sprintf(pattern, args...)) } func (s *convertServer) serverError(ctx context.Context, w http.ResponseWriter, r *http.Request, err error) { @@ -104,7 +101,7 @@ func (s *convertServer) serverError(ctx context.Context, w http.ResponseWriter, } s.er.Report(e) - log.Printf("%v", err) + s.l.ErrorContext(ctx, err.Error(), "error", err) } func (s *convertServer) respond(ctx context.Context, w http.ResponseWriter, r *http.Request, code int, resp interface{}) { diff --git a/docd/internal/cloudtrace/context.go b/docd/internal/cloudtrace/context.go new file mode 100644 index 0000000..97b0b22 --- /dev/null +++ b/docd/internal/cloudtrace/context.go @@ -0,0 +1,25 @@ +package cloudtrace + +import ( + "context" +) + +type traceKey struct{} +type spanKey struct{} + +func contextWithTraceInfo(ctx context.Context, traceHeader string) context.Context { + traceID, spanID := parseHeader(traceHeader) + if traceID != "" { + ctx = context.WithValue(ctx, traceKey{}, traceID) + } + if spanID != "" { + ctx = context.WithValue(ctx, spanKey{}, spanID) + } + return ctx +} + +func traceInfoFromContext(ctx context.Context) (traceID, spanID string) { + traceID, _ = ctx.Value(traceKey{}).(string) + spanID, _ = ctx.Value(spanKey{}).(string) + return +} diff --git a/docd/internal/cloudtrace/header.go b/docd/internal/cloudtrace/header.go new file mode 100644 index 0000000..c441baa --- /dev/null +++ b/docd/internal/cloudtrace/header.go @@ -0,0 +1,16 @@ +package cloudtrace + +import "strings" + +// The header specification is: +// "X-Cloud-Trace-Context: TRACE_ID/SPAN_ID;o=TRACE_TRUE" +const CloudTraceContextHeader = "X-Cloud-Trace-Context" + +func parseHeader(value string) (traceID, spanID string) { + var found bool + traceID, after, found := strings.Cut(value, "/") + if found { + spanID, _, _ = strings.Cut(after, ";") + } + return +} diff --git a/docd/internal/cloudtrace/http_handler.go b/docd/internal/cloudtrace/http_handler.go new file mode 100644 index 0000000..1a7071c --- /dev/null +++ b/docd/internal/cloudtrace/http_handler.go @@ -0,0 +1,14 @@ +package cloudtrace + +import "net/http" + +type HTTPHandler struct { + // Handler to wrap. + Handler http.Handler +} + +func (h *HTTPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := contextWithTraceInfo(r.Context(), r.Header.Get(CloudTraceContextHeader)) + + h.Handler.ServeHTTP(w, r.WithContext(ctx)) +} diff --git a/docd/internal/cloudtrace/logger.go b/docd/internal/cloudtrace/logger.go new file mode 100644 index 0000000..5bd8016 --- /dev/null +++ b/docd/internal/cloudtrace/logger.go @@ -0,0 +1,72 @@ +package cloudtrace + +// Inspired by https://github.com/remko/cloudrun-slog + +import ( + "context" + "fmt" + "log/slog" + "os" +) + +// Extra log level supported by Cloud Logging +const LevelCritical = slog.Level(12) + +// Handler that outputs JSON understood by the structured log agent. +// See https://cloud.google.com/logging/docs/agent/logging/configuration#special-fields +type CloudLoggingHandler struct { + handler slog.Handler + projectID string +} + +var _ slog.Handler = (*CloudLoggingHandler)(nil) + +func NewCloudLoggingHandler(projectID string, level slog.Level) *CloudLoggingHandler { + return &CloudLoggingHandler{ + projectID: projectID, + handler: slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{ + AddSource: true, + Level: level, + ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr { + if a.Key == slog.MessageKey { + a.Key = "message" + } else if a.Key == slog.SourceKey { + a.Key = "logging.googleapis.com/sourceLocation" + } else if a.Key == slog.LevelKey { + a.Key = "severity" + level := a.Value.Any().(slog.Level) + if level == LevelCritical { + a.Value = slog.StringValue("CRITICAL") + } + } + return a + }, + }), + } +} + +func (h *CloudLoggingHandler) Enabled(ctx context.Context, level slog.Level) bool { + return h.handler.Enabled(ctx, level) +} + +func (h *CloudLoggingHandler) Handle(ctx context.Context, rec slog.Record) error { + traceID, spanID := traceInfoFromContext(ctx) + if traceID != "" { + rec = rec.Clone() + // https://cloud.google.com/logging/docs/agent/logging/configuration#special-fields + trace := fmt.Sprintf("projects/%s/traces/%s", h.projectID, traceID) + rec.Add("logging.googleapis.com/trace", slog.StringValue(trace)) + if spanID != "" { + rec.Add("logging.googleapis.com/spanId", slog.StringValue(spanID)) + } + } + return h.handler.Handle(ctx, rec) +} + +func (h *CloudLoggingHandler) WithAttrs(attrs []slog.Attr) slog.Handler { + return &CloudLoggingHandler{handler: h.handler.WithAttrs(attrs)} +} + +func (h *CloudLoggingHandler) WithGroup(name string) slog.Handler { + return &CloudLoggingHandler{handler: h.handler.WithGroup(name)} +} diff --git a/docd/internal/debug/context.go b/docd/internal/debug/context.go new file mode 100644 index 0000000..f2896fd --- /dev/null +++ b/docd/internal/debug/context.go @@ -0,0 +1,15 @@ +package debug + +import ( + "context" +) + +type debugEnabledKey struct{} + +func debugEnabledInContext(ctx context.Context) bool { + enabled, ok := ctx.Value(debugEnabledKey{}).(bool) + if !ok { + return false + } + return enabled +} diff --git a/docd/internal/debug/http_handler.go b/docd/internal/debug/http_handler.go new file mode 100644 index 0000000..328832d --- /dev/null +++ b/docd/internal/debug/http_handler.go @@ -0,0 +1,22 @@ +package debug + +import ( + "context" + "net/http" + "strconv" +) + +type HTTPHandler struct { + // Handler to wrap. + Handler http.Handler +} + +func (h *HTTPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + if ok, _ := strconv.ParseBool(r.Header.Get(DebugHeader)); ok { + ctx = context.WithValue(ctx, debugEnabledKey{}, true) + } + + h.Handler.ServeHTTP(w, r.WithContext(ctx)) +} diff --git a/docd/internal/debug/logger.go b/docd/internal/debug/logger.go new file mode 100644 index 0000000..c651e23 --- /dev/null +++ b/docd/internal/debug/logger.go @@ -0,0 +1,33 @@ +package debug + +import ( + "context" + "log/slog" +) + +const DebugHeader = "X-Debug" + +type debugHandler struct { + slog.Handler +} + +func NewDebugHandler(h slog.Handler) *debugHandler { + return &debugHandler{Handler: h} +} + +var _ slog.Handler = (*debugHandler)(nil) + +func (h *debugHandler) Enabled(ctx context.Context, level slog.Level) bool { + if debugEnabledInContext(ctx) { + return true + } + return h.Handler.Enabled(ctx, level) +} + +func (h *debugHandler) WithAttrs(attrs []slog.Attr) slog.Handler { + return &debugHandler{Handler: h.Handler.WithAttrs(attrs)} +} + +func (h *debugHandler) WithGroup(name string) slog.Handler { + return &debugHandler{Handler: h.Handler.WithGroup(name)} +} diff --git a/docd/internal/recovery.go b/docd/internal/recovery.go index b6f5f27..2bf86d0 100644 --- a/docd/internal/recovery.go +++ b/docd/internal/recovery.go @@ -3,7 +3,7 @@ package internal import ( "errors" "fmt" - "log" + "log/slog" "net/http" "runtime/debug" @@ -11,15 +11,16 @@ import ( ) type recoveryHandler struct { + l *slog.Logger er ErrorReporter handler http.Handler } // RecoveryHandler is HTTP middleware that recovers from a panic, writes a // 500, reports the panic, logs the panic and continues to the next handler. -func RecoveryHandler(er ErrorReporter) func(h http.Handler) http.Handler { +func RecoveryHandler(l *slog.Logger, er ErrorReporter) func(h http.Handler) http.Handler { return func(h http.Handler) http.Handler { - return &recoveryHandler{er: er, handler: h} + return &recoveryHandler{l: l, er: er, handler: h} } } @@ -45,8 +46,7 @@ func (h recoveryHandler) handle(r *http.Request, err error) { } h.er.Report(e) - log.Println(err) - log.Printf("%s", stack) + h.l.ErrorContext(r.Context(), err.Error(), "error", err, "stack", string(stack)) } // recovered represents the return value from a call to recover. diff --git a/docd/main.go b/docd/main.go index 2503f52..48c9328 100644 --- a/docd/main.go +++ b/docd/main.go @@ -4,17 +4,20 @@ import ( "context" "flag" "fmt" - "log" + "log/slog" "net/http" "os" "runtime" + "cloud.google.com/go/compute/metadata" "cloud.google.com/go/errorreporting" "github.com/gorilla/mux" "code.sajari.com/docconv" "code.sajari.com/docconv/docd/internal" + "code.sajari.com/docconv/docd/internal/cloudtrace" + "code.sajari.com/docconv/docd/internal/debug" ) var ( @@ -22,12 +25,14 @@ var ( inputPath = flag.String("input", "", "The file path to convert and exit; no server") + jsonCloudLogging = flag.Bool("json-cloud-logging", false, "Whether or not to enable JSON Cloud Logging") + + cloudTraceGCPProjectID = flag.String("cloud-trace-gcp-project-id", "", "The GCP project to use for Cloud Trace") + errorReporting = flag.Bool("error-reporting", false, "Whether or not to enable GCP Error Reporting") errorReportingGCPProjectID = flag.String("error-reporting-gcp-project-id", "", "The GCP project to use for Error Reporting") errorReportingAppEngineService = flag.String("error-reporting-app-engine-service", "", "The App Engine service to use for Error Reporting") - logLevel = flag.Uint("log-level", 0, "The verbosity of the log") - readabilityLengthLow = flag.Int("readability-length-low", 70, "Sets the readability length low") readabilityLengthHigh = flag.Int("readability-length-high", 200, "Sets the readability length high") readabilityStopwordsLow = flag.Float64("readability-stopwords-low", 0.2, "Sets the readability stopwords low") @@ -40,6 +45,28 @@ var ( func main() { flag.Parse() + l := slog.New(debug.NewDebugHandler(slog.Default().Handler())) + + if *jsonCloudLogging { + gcpProjectID := *cloudTraceGCPProjectID + if gcpProjectID == "" { + gcpProjectID = os.Getenv("GOOGLE_CLOUD_PROJECT") + } + if gcpProjectID == "" { + l.Debug("GOOGLE_CLOUD_PROJECT env var not provided, looking up internal metadata service") + var err error + gcpProjectID, err = metadata.ProjectID() + if err != nil { + l.Error("Could not autodetect GCP project ID", "error", err) + os.Exit(1) + } + } + + l = slog.New(debug.NewDebugHandler(cloudtrace.NewCloudLoggingHandler(gcpProjectID, slog.LevelInfo))) + slog.SetDefault(l) + l.Info("Cloud Trace GCP project ID", "projectID", gcpProjectID) + } + var er internal.ErrorReporter = &internal.NopErrorReporter{} if *errorReporting { if *errorReportingGCPProjectID == "" { @@ -52,15 +79,17 @@ func main() { er, err = errorreporting.NewClient(context.Background(), *errorReportingGCPProjectID, errorreporting.Config{ ServiceName: *errorReportingAppEngineService, OnError: func(err error) { - log.Printf("Could not report error to Error Reporting service: %v", err) + l.Error("Could not report error to Error Reporting service", "error", err) }, }) if err != nil { - log.Fatalf("Could not create Error Reporting client: %v", err) + l.Error("Could not create Error Reporting client", "error", err) + os.Exit(1) } } cs := &convertServer{ + l: l, er: er, } @@ -78,23 +107,29 @@ func main() { if *inputPath != "" { resp, err := docconv.ConvertPath(*inputPath) if err != nil { - log.Printf("error converting file '%v': %v", *inputPath, err) + l.Error("Could not convert file", "error", err, "path", *inputPath) + os.Exit(1) } fmt.Print(string(resp.Body)) return } - serve(er, cs) + serve(l, er, cs) } // Start the conversion web service -func serve(er internal.ErrorReporter, cs *convertServer) { +func serve(l *slog.Logger, er internal.ErrorReporter, cs *convertServer) { r := mux.NewRouter() r.HandleFunc("/convert", cs.convert) - // Start webserver - log.Println("Go version", runtime.Version()) - log.Println("Setting log level to", *logLevel) - log.Println("Starting docconv on", *listenAddr) - log.Fatal(http.ListenAndServe(*listenAddr, internal.RecoveryHandler(er)(r))) + h := internal.RecoveryHandler(l, er)(r) + h = &debug.HTTPHandler{Handler: h} + h = &cloudtrace.HTTPHandler{Handler: h} + + l.Info("Go version " + runtime.Version()) + + l.Info(fmt.Sprintf("HTTP server listening on %q...", *listenAddr)) + if err := http.ListenAndServe(*listenAddr, h); err != nil { + l.Error("HTTP server ListenAndServe", "error", err) + } } diff --git a/go.mod b/go.mod index c7c52f8..ad1179e 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,9 @@ module code.sajari.com/docconv -go 1.20 +go 1.21 require ( + cloud.google.com/go/compute v0.1.0 cloud.google.com/go/errorreporting v0.2.0 github.com/JalfResi/justext v0.0.0-20170829062021-c0282dea7198 github.com/advancedlogic/GoOse v0.0.0-20191112112754-e742535969c1 @@ -17,7 +18,6 @@ require ( require ( cloud.google.com/go v0.100.2 // indirect - cloud.google.com/go/compute v0.1.0 // indirect github.com/PuerkitoBio/goquery v1.5.1 // indirect github.com/andybalholm/cascadia v1.2.0 // indirect github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1 // indirect