diff options
| author | Jonathan Amsterdam <jba@google.com> | 2024-01-10 15:44:13 -0500 |
|---|---|---|
| committer | Jonathan Amsterdam <jba@google.com> | 2024-02-23 17:34:38 +0000 |
| commit | 9b2cbf616e06f212853911e9eaf650342bd0e13f (patch) | |
| tree | 7a2812456ba9854c2d7528d7a21bdbfa709ac35e /devtools/cmd/dumpdoc/dumpdoc.go | |
| parent | 094b90f1e862df40cb520acaec64948fb544e37c (diff) | |
| download | go-x-pkgsite-9b2cbf616e06f212853911e9eaf650342bd0e13f.tar.xz | |
devtools/cmd/dumpdoc: tool for writing out package documentation
This is a tool for an experiment.
It reads package documentation from the database and writes it
to a file in gob format.
Change-Id: I3b6cfde24ce155a647c3c43caff68be63832f80f
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/566416
kokoro-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'devtools/cmd/dumpdoc/dumpdoc.go')
| -rw-r--r-- | devtools/cmd/dumpdoc/dumpdoc.go | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/devtools/cmd/dumpdoc/dumpdoc.go b/devtools/cmd/dumpdoc/dumpdoc.go new file mode 100644 index 00000000..4998b5fc --- /dev/null +++ b/devtools/cmd/dumpdoc/dumpdoc.go @@ -0,0 +1,180 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The dumpdoc command writes documentation and readmes for packages +// in search_documents to a gob file. +package main + +import ( + "context" + "database/sql" + "encoding/gob" + "flag" + "fmt" + "io" + "os" + "strings" + + _ "github.com/jackc/pgx/v4/stdlib" // for pgx driver + "golang.org/x/pkgsite/internal/config/serverconfig" + "golang.org/x/pkgsite/internal/database" + "golang.org/x/pkgsite/internal/godoc" + "golang.org/x/pkgsite/internal/log" +) + +var ( + truncate = flag.Int("t", 0, "(only for read) truncate long strings to the given length") + minImporters = flag.Int("i", 1, "(only for write) include only packages with at least this many importers") +) + +func main() { + ctx := context.Background() + flag.Usage = func() { + out := flag.CommandLine.Output() + fmt.Fprintf(out, "usage:\n") + fmt.Fprintf(out, " %s [flags] write FILE\n", os.Args[0]) + fmt.Fprintf(out, " %s [flags] read FILE\n", os.Args[0]) + flag.PrintDefaults() + } + + flag.Parse() + + if flag.Arg(1) == "" { + flag.Usage() + os.Exit(1) + } + if err := run(ctx, flag.Arg(0), flag.Arg(1)); err != nil { + log.Fatal(ctx, err) + } +} + +func run(ctx context.Context, cmd, filename string) error { + cfg, err := serverconfig.Init(ctx) + if err != nil { + return err + } + switch cmd { + case "write": + db, err := database.Open("pgx", cfg.DBConnInfo(), "dumpdoc") + if err != nil { + return err + } + defer db.Close() + return write(ctx, db, filename) + case "read": + return read(filename) + default: + return fmt.Errorf("unknown command %q", cmd) + } +} + +type PackageDoc struct { + ImportPath string + ModulePath string + Version string + NumImporters int + PackageDoc string + ReadmeFilename *string + ReadmeContents *string +} + +func write(ctx context.Context, db *database.DB, filename string) error { + query := fmt.Sprintf(` + SELECT s.package_path, s.module_path, s.version, s.imported_by_count, + r.file_path, r.contents, + d.source + FROM search_documents s + LEFT JOIN readmes r USING (unit_id) + INNER JOIN documentation d USING (unit_id) + WHERE (d.goos = 'all' OR d.goos = 'linux') + AND imported_by_count >= %d + `, *minImporters) + f, err := os.Create(filename) + if err != nil { + return err + } + enc := gob.NewEncoder(f) + n := 0 + err = db.RunQuery(ctx, query, func(rows *sql.Rows) error { + var pd PackageDoc + var source []byte + err := rows.Scan(&pd.ImportPath, &pd.ModulePath, &pd.Version, &pd.NumImporters, + &pd.ReadmeFilename, &pd.ReadmeContents, &source) + if err != nil { + return err + } + if err := populateDoc(&pd, source); err != nil { + return err + } + if err := enc.Encode(pd); err != nil { + return err + } + n++ + if n%1000 == 0 { + fmt.Printf("%d\n", n) + } + return nil + }) + if err != nil { + return err + } + fmt.Printf("wrote %d packages.\n", n) + return f.Close() +} + +func populateDoc(pd *PackageDoc, source []byte) error { + gpkg, err := godoc.DecodePackage(source) + if err != nil { + return err + } + innerPath := strings.TrimPrefix(pd.ImportPath, pd.ModulePath+"/") + modInfo := &godoc.ModuleInfo{ModulePath: pd.ModulePath, ResolvedVersion: pd.Version} + dpkg, err := gpkg.DocPackage(innerPath, modInfo) + if err != nil { + return err + } + if strings.TrimSpace(dpkg.Doc) == "" { + return nil + } + pd.PackageDoc = dpkg.Doc + return nil +} + +func read(filename string) error { + f, err := os.Open(filename) + if err != nil { + return err + } + defer f.Close() + dec := gob.NewDecoder(f) + for { + var pd PackageDoc + err := dec.Decode(&pd) + if err == io.EOF { + return nil + } + if err != nil { + return err + } + pd.PackageDoc = trunc(pd.PackageDoc) + fmt.Printf("%s (%s@%s):\n", pd.ImportPath, pd.ModulePath, pd.Version) + fmt.Printf(" %d importers\n", pd.NumImporters) + fmt.Printf(" pkg doc: %q\n", pd.PackageDoc) + if pd.ReadmeFilename != nil && pd.ReadmeContents != nil { + *pd.ReadmeContents = trunc(*pd.ReadmeContents) + fmt.Printf(" readme (from %s): %q\n", *pd.ReadmeFilename, *pd.ReadmeContents) + } + } + +} + +func trunc(s string) string { + if *truncate <= 0 { + return s + } + if len(s) < *truncate { + return s + } + return s[:*truncate] +} |
