From 8b28f5c4c6b0b2cea0fb999ddb5435d784aa4529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Thu, 27 Jan 2022 22:47:55 +0000 Subject: [PATCH] stop loading obfuscated type information from deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If package P1 imports package P2, P1 needs to know which names from P2 weren't obfuscated. For instance, if P2 declares T2 and does "reflect.TypeOf(T2{...})", then P2 won't obfuscate the name T2, and neither should P1. This information should flow from P2 to P1, as P2 builds before P1. We do this via obfuscatedTypesPackage; P1 loads the type information of the obfuscated version of P2, and does a lookup for T2. If T2 exists, then it wasn't obfuscated. This mechanism has served us well, but it has downsides: 1) It wastes CPU; we load the type information for the entire package. 2) It's complex; for instance, we need KnownObjectFiles as an extra. 3) It makes our code harder to understand, as we load both the original and obfuscated type informaiton. Instead, we now have each package record what names were not obfuscated as part of its cachedOuput file. Much like KnownObjectFiles, the map records incrementally through the import graph, to avoid having to load cachedOutput files for indirect dependencies. We shouldn't need to worry about those maps getting large; we only skip obfuscating declared names in a few uncommon scenarios, such as the use of reflection or cgo's "//export". Since go/types is relatively allocation-heavy, and the export files contain a lot of data, we get a nice speed-up: name old time/op new time/op delta Build-16 11.5s ± 2% 11.1s ± 3% -3.77% (p=0.008 n=5+5) name old bin-B new bin-B delta Build-16 5.15M ± 0% 5.15M ± 0% ~ (all equal) name old cached-time/op new cached-time/op delta Build-16 375ms ± 3% 341ms ± 6% -8.96% (p=0.008 n=5+5) name old sys-time/op new sys-time/op delta Build-16 283ms ±17% 289ms ±13% ~ (p=0.841 n=5+5) name old user-time/op new user-time/op delta Build-16 687ms ± 6% 664ms ± 7% ~ (p=0.548 n=5+5) Fixes #456. Updates #475. --- main.go | 221 +++++++++++++++++------------------ testdata/scripts/cgo.txt | 12 +- testdata/scripts/reflect.txt | 10 +- 3 files changed, 124 insertions(+), 119 deletions(-) diff --git a/main.go b/main.go index 1899ed5e..bac374cc 100644 --- a/main.go +++ b/main.go @@ -152,14 +152,6 @@ var ( // Basic information about the package being currently compiled or linked. curPkg *listedPackage - - garbledImporter = importer.ForCompiler(fset, "gc", func(path string) (io.ReadCloser, error) { - pkgfile := cachedOutput.KnownObjectFiles[path] - if pkgfile == "" { - panic(fmt.Sprintf("missing in cachedOutput.KnownObjectFiles: %q", path)) - } - return os.Open(pkgfile) - }).(types.ImporterFrom) ) type importerWithMap func(path, dir string, mode types.ImportMode) (*types.Package, error) @@ -175,23 +167,6 @@ func (fn importerWithMap) ImportFrom(path, dir string, mode types.ImportMode) (* return fn(path, dir, mode) } -func obfuscatedTypesPackage(path string) *types.Package { - if path == curPkg.ImportPath { - panic("called obfuscatedTypesPackage on the current package?") - } - if pkg := cachedTypesPackages[path]; pkg != nil { - return pkg - } - pkg, err := garbledImporter.ImportFrom(path, parentWorkDir, 0) - if err != nil { - panic(err) - } - cachedTypesPackages[path] = pkg // cache for later use - return pkg -} - -var cachedTypesPackages = make(map[string]*types.Package) - // uniqueLineWriter sits underneath log.SetOutput to deduplicate log lines. // We log bits of useful information for debugging, // and logging the same detail twice is not going to help the user. @@ -711,13 +686,6 @@ func transformCompile(args []string) ([]string, error) { return nil, err } - if err := writeGobExclusive( - garbleExportFile(curPkg), - cachedOutput, - ); err != nil && !errors.Is(err, fs.ErrExist) { - return nil, err - } - // Literal obfuscation uses math/rand, so seed it deterministically. randSeed := flagSeed.bytes if len(randSeed) == 0 { @@ -739,17 +707,14 @@ func transformCompile(args []string) ([]string, error) { newPaths := make([]string, 0, len(files)) for i, file := range files { - name := filepath.Base(paths[i]) + filename := filepath.Base(paths[i]) + debugf("obfuscating %s", filename) if curPkg.ImportPath == "runtime" && flagTiny { // strip unneeded runtime code - stripRuntime(name, file) - } - if strings.HasPrefix(name, "_cgo_") { - // Don't obfuscate cgo code, since it's generated and it gets messy. - } else { - tf.handleDirectives(file.Comments) - file = tf.transformGo(file) + stripRuntime(filename, file) } + tf.handleDirectives(file.Comments) + file = tf.transformGo(filename, file) if newPkgPath != "" { file.Name.Name = newPkgPath } @@ -761,10 +726,10 @@ func transformCompile(args []string) ([]string, error) { // Uncomment for some quick debugging. Do not delete. // if curPkg.ToObfuscate { - // fmt.Fprintf(os.Stderr, "\n-- %s/%s --\n%s", curPkg.ImportPath, name, src) + // fmt.Fprintf(os.Stderr, "\n-- %s/%s --\n%s", curPkg.ImportPath, filename, src) // } - if path, err := writeTemp(name, src); err != nil { + if path, err := writeTemp(filename, src); err != nil { return nil, err } else { newPaths = append(newPaths, path) @@ -776,7 +741,7 @@ func transformCompile(args []string) ([]string, error) { return nil, err } - debugFilePath := filepath.Join(pkgDebugDir, name) + debugFilePath := filepath.Join(pkgDebugDir, filename) if err := os.WriteFile(debugFilePath, src, 0o666); err != nil { return nil, err } @@ -784,6 +749,13 @@ func transformCompile(args []string) ([]string, error) { } flags = flagSetValue(flags, "-importcfg", newImportCfg) + if err := writeGobExclusive( + garbleExportFile(curPkg), + cachedOutput, + ); err != nil && !errors.Is(err, fs.ErrExist) { + return nil, err + } + return append(flags, newPaths...), nil } @@ -816,7 +788,10 @@ func (tf *transformer) handleDirectives(comments []*ast.CommentGroup) { newName := fields[2] dotCnt := strings.Count(newName, ".") if dotCnt < 1 { - // probably a malformed linkname directive + // cgo-generated code uses linknames to made up symbol names, + // which do not have a package path at all. + // Replace the comment in case the local name was obfuscated. + comment.Text = strings.Join(fields, " ") continue } switch newName { @@ -833,8 +808,7 @@ func (tf *transformer) handleDirectives(comments []*ast.CommentGroup) { lpkg, err := listPackage(pkgPath) if err != nil { - // probably a made up symbol name, replace the comment - // in case the local name was obfuscated. + // Probably a made up name like above, but with a dot. comment.Text = strings.Join(fields, " ") continue } @@ -919,8 +893,7 @@ func toObfuscate(path string) bool { return module.MatchPrefixPatterns(cache.GOGARBLE, path) } -// processImportCfg parses the importcfg file passed to a compile or link step, -// adding missing entries to KnownObjectFiles to be stored in the build cache. +// processImportCfg parses the importcfg file passed to a compile or link step. // It also builds a new importcfg file to account for obfuscated import paths. func processImportCfg(flags []string) (newImportCfg string, _ error) { importCfg := flagValue(flags, "-importcfg") @@ -962,20 +935,6 @@ func processImportCfg(flags []string) (newImportCfg string, _ error) { importPath, objectPath := args[:j], args[j+1:] packagefiles = append(packagefiles, [2]string{importPath, objectPath}) - - if prev := cachedOutput.KnownObjectFiles[importPath]; prev != "" { - // Nothing to do; recorded by one of our dependencies. - } else if strings.HasSuffix(objectPath, "_pkg_.a") { - // The path is inside a temporary directory, to be deleted soon. - // Record the final location within the build cache instead. - finalObjectPath, err := gocachePathForFile(objectPath) - if err != nil { - return "", err - } - cachedOutput.KnownObjectFiles[importPath] = finalObjectPath - } else { - cachedOutput.KnownObjectFiles[importPath] = objectPath - } } } @@ -1034,27 +993,26 @@ type ( // cachedOutput contains information that will be stored as per garbleExportFile. var cachedOutput = struct { - // KnownObjectFiles is filled from -importcfg in the current obfuscated build. - // As such, it records export data for the dependencies which might be - // themselves obfuscated, depending on GOGARBLE. - // - // TODO: We rely on obfuscated type information to know what names we didn't - // obfuscate. Instead, directly record what names we chose not to obfuscate, - // which should then avoid having to go through go/types. - KnownObjectFiles map[string]string - // KnownReflectAPIs is a static record of what std APIs use reflection on their // parameters, so we can avoid obfuscating types used with them. // // TODO: we're not including fmt.Printf, as it would have many false positives, // unless we were smart enough to detect which arguments get used as %#v or %T. KnownReflectAPIs map[funcFullName][]reflectParameterPosition + + // KnownCannotObfuscate is filled with the fully qualified names from each + // package that we could not obfuscate as per cannotObfuscateNames. + // This record is necessary for knowing what names from imported packages + // weren't obfuscated, so we can obfuscate their local uses accordingly. + // + // TODO: merge cannotObfuscateNames into this directly + KnownCannotObfuscate map[string]struct{} }{ - KnownObjectFiles: map[string]string{}, KnownReflectAPIs: map[funcFullName][]reflectParameterPosition{ "reflect.TypeOf": {0}, "reflect.ValueOf": {0}, }, + KnownCannotObfuscate: map[string]struct{}{}, } // garbleExportFile returns an absolute path to a build cache entry @@ -1381,8 +1339,76 @@ func (tf *transformer) recordType(t types.Type) { } } +func recordedObjectString(obj types.Object) string { + if !obj.Exported() { + // Unexported names will never be used by other packages, + // so we don't need to bother recording them. + return "" + } + if obj, ok := obj.(*types.Var); ok && obj.IsField() { + // For exported fields, "pkgpath.Field" is not unique, + // because two exported top-level types could share "Field". + // + // Moreover, note that not all fields belong to named struct types; + // an API could be exposing: + // + // var usedInReflection = struct{Field string} + // + // For now, a hack: assume that packages don't declare the same field + // more than once in the same line. This works in practice, but one + // could craft Go code to break this assumption. + // Also note that the compiler's object files include filenames and line + // numbers, but not column numbers nor byte offsets. + // TODO(mvdan): give this another think, and add tests involving anon types. + pos := fset.Position(obj.Pos()) + return fmt.Sprintf("%s.%s - %s:%d", obj.Pkg().Path(), obj.Name(), + filepath.Base(pos.Filename), pos.Line) + } + // Names which are not at the top level cannot be imported, + // so we don't need to record them either. + // Note that this doesn't apply to fields, which are never top-level. + if obj.Pkg().Scope().Lookup(obj.Name()) != obj { + return "" + } + // For top-level exported names, "pkgpath.Name" is unique. + return fmt.Sprintf("%s.%s", obj.Pkg().Path(), obj.Name()) +} + +func recordAsNotObfuscated(obj types.Object) bool { + if obj.Pkg().Path() != curPkg.ImportPath { + panic("called recordedAsNotObfuscated with a foreign object") + } + if !obj.Exported() { + // Unexported names will never be used by other packages, + // so we don't need to bother recording them. + return true + } + + if objStr := recordedObjectString(obj); objStr != "" { + cachedOutput.KnownCannotObfuscate[objStr] = struct{}{} + } + return true // to simplify early returns in astutil.ApplyFunc +} + +func recordedAsNotObfuscated(obj types.Object) bool { + if obj.Pkg().Path() == curPkg.ImportPath { + // The current package knows what names it's not obfuscating. + return false + } + if !obj.Exported() { + // Not recorded, as per recordAsNotObfuscated. + return false + } + objStr := recordedObjectString(obj) + if objStr == "" { + return false + } + _, ok := cachedOutput.KnownCannotObfuscate[objStr] + return ok +} + // transformGo obfuscates the provided Go syntax file. -func (tf *transformer) transformGo(file *ast.File) *ast.File { +func (tf *transformer) transformGo(filename string, file *ast.File) *ast.File { // Only obfuscate the literals here if the flag is on // and if the package in question is to be obfuscated. // @@ -1398,12 +1424,10 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File { if !ok { return true } + // TODO(mvdan): use "name := node.Name" if node.Name == "_" { return true // unnamed remains unnamed } - if strings.HasPrefix(node.Name, "_C") || strings.Contains(node.Name, "_cgo") { - return true // don't mess with cgo-generated code - } obj := tf.info.ObjectOf(node) if obj == nil { _, isImplicit := tf.info.Defs[node] @@ -1484,6 +1508,16 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File { // We don't want to obfuscate this object name. if tf.cannotObfuscateNames[obj] { + return recordAsNotObfuscated(obj) + } + // The imported package that declared this object did not obfuscate it. + if recordedAsNotObfuscated(obj) { + return true + } + + // TODO(mvdan): investigate obfuscating these too. + filename := fset.Position(obj.Pos()).Filename + if strings.HasPrefix(filename, "_cgo_") || strings.Contains(filename, ".cgo1.") { return true } @@ -1530,43 +1564,8 @@ func (tf *transformer) transformGo(file *ast.File) *ast.File { fieldsHash := []byte(strct.String()) hashToUse = addGarbleToHash(fieldsHash) - // If the struct of this field was not obfuscated, do not obfuscate - // any of that struct's fields. - parent, ok := cursor.Parent().(*ast.SelectorExpr) - if !ok { - break - } - named := namedType(tf.info.TypeOf(parent.X)) - if named == nil { - break // TODO(mvdan): add a test - } - if name := named.Obj().Name(); strings.HasPrefix(name, "_Ctype") { - // A field accessor on a cgo type, such as a C struct. - // We're not obfuscating cgo names. - return true - } - if path != curPkg.ImportPath { - obfPkg := obfuscatedTypesPackage(path) - if obfPkg.Scope().Lookup(named.Obj().Name()) != nil { - tf.recordIgnore(named, path) - return true - } - } case *types.TypeName: debugName = "type" - // If the type was not obfuscated in the package were it was defined, - // do not obfuscate it here. - if path != curPkg.ImportPath { - named := namedType(obj.Type()) - if named == nil { - break // TODO(mvdan): add a test - } - obfPkg := obfuscatedTypesPackage(path) - if obfPkg.Scope().Lookup(obj.Name()) != nil { - tf.recordIgnore(named, path) - return true - } - } case *types.Func: sign := obj.Type().(*types.Signature) if sign.Recv() == nil { diff --git a/testdata/scripts/cgo.txt b/testdata/scripts/cgo.txt index 9144e919..58d334dd 100644 --- a/testdata/scripts/cgo.txt +++ b/testdata/scripts/cgo.txt @@ -5,7 +5,7 @@ env GOGARBLE=test/main garble build exec ./main cmp stdout main.stdout -binsubstr main$exe 'privateAdd' +! binsubstr main$exe 'PortedField' [short] stop # no need to verify this with -short @@ -13,17 +13,16 @@ env GOGARBLE=* garble build exec ./main cmp stdout main.stdout -binsubstr main$exe 'privateAdd' env GOGARBLE=test/main garble -tiny build exec ./main cmp stdout main.stdout -binsubstr main$exe 'privateAdd' go build exec ./main cmp stdout main.stdout +binsubstr main$exe 'privateAdd' -- go.mod -- module test/main @@ -46,7 +45,7 @@ static void callGoCallback() { } struct portedStruct { - char* PortedField; + char* PortedField; }; */ import "C" @@ -56,7 +55,8 @@ func main() { fmt.Println(C.privateAdd(C.int(1), C.int(2))) _, _ = user.Current() - fmt.Printf("%#v\n", C.struct_portedStruct{}) + st := C.struct_portedStruct{} + fmt.Println(st.PortedField == nil) C.callGoCallback() } @@ -67,5 +67,5 @@ func goCallback() { } -- main.stdout -- 3 -main._Ctype_struct_portedStruct{PortedField:(*main._Ctype_char)(nil)} +true go callback diff --git a/testdata/scripts/reflect.txt b/testdata/scripts/reflect.txt index 805c3f6a..8763779c 100644 --- a/testdata/scripts/reflect.txt +++ b/testdata/scripts/reflect.txt @@ -89,7 +89,9 @@ func main() { // Using type aliases as both regular fields, and embedded fields. var emb EmbeddingIndirect emb.With.IndirectUnobfuscated = "indirect-with" + emb.With.DuplicateFieldName = 3 emb.Without.IndirectObfuscated = "indirect-without" + emb.Without.DuplicateFieldName = 4 fmt.Printf("%v\n", emb) printfWithoutPackage("%#v\n", emb.With) @@ -231,10 +233,14 @@ var _ = reflect.TypeOf(IndirectNamedWithReflect{}) type IndirectNamedWithReflect struct { IndirectUnobfuscated string + + DuplicateFieldName int } type IndirectNamedWithoutReflect struct { IndirectObfuscated string + + DuplicateFieldName int } -- main.stdout -- @@ -249,7 +255,7 @@ Hello Dave. {"InnerField":3,"Anon":{"AnonField":0}} {downstream} {sibling} -{{indirect-with} {indirect-without} {}} -IndirectNamedWithReflect{IndirectUnobfuscated:"indirect-with"} +{{indirect-with 3} {indirect-without 4} { 0}} +IndirectNamedWithReflect{IndirectUnobfuscated:"indirect-with", DuplicateFieldName:3} ReflectionField {0}