| // The digraph command performs queries over unlabelled directed graphs |
| // represented in text form. It is intended to integrate nicely with |
| // typical UNIX command pipelines. |
| // |
| // Since directed graphs (import graphs, reference graphs, call graphs, |
| // etc) often arise during software tool development and debugging, this |
| // command is included in the go.tools repository. |
| // |
| // TODO(adonovan): |
| // - support input files other than stdin |
| // - suport alternative formats (AT&T GraphViz, CSV, etc), |
| // a comment syntax, etc. |
| // - allow queries to nest, like Blaze query language. |
| // |
| package main |
| |
| import ( |
| "bufio" |
| "bytes" |
| "errors" |
| "flag" |
| "fmt" |
| "io" |
| "os" |
| "sort" |
| "strconv" |
| "unicode" |
| "unicode/utf8" |
| ) |
| |
| const Usage = `digraph: queries over directed graphs in text form. |
| |
| Graph format: |
| |
| Each line contains zero or more words. Words are separated by |
| unquoted whitespace; words may contain Go-style double-quoted portions, |
| allowing spaces and other characters to be expressed. |
| |
| Each field declares a node, and if there are more than one, |
| an edge from the first to each subsequent one. |
| The graph is provided on the standard input. |
| |
| For instance, the following (acyclic) graph specifies a partial order |
| among the subtasks of getting dressed: |
| |
| % cat clothes.txt |
| socks shoes |
| "boxer shorts" pants |
| pants belt shoes |
| shirt tie sweater |
| sweater jacket |
| hat |
| |
| The line "shirt tie sweater" indicates the two edges shirt -> tie and |
| shirt -> sweater, not shirt -> tie -> sweater. |
| |
| Supported queries: |
| |
| nodes |
| the set of all nodes |
| degree |
| the in-degree and out-degree of each node. |
| preds <label> ... |
| the set of immediate predecessors of the specified nodes |
| succs <label> ... |
| the set of immediate successors of the specified nodes |
| forward <label> ... |
| the set of nodes transitively reachable from the specified nodes |
| reverse <label> ... |
| the set of nodes that transitively reach the specified nodes |
| somepath <label> <label> |
| the list of nodes on some arbitrary path from the first node to the second |
| allpaths <label> <label> |
| the set of nodes on all paths from the first node to the second |
| sccs |
| all strongly connected components (one per line) |
| scc <label> |
| the set of nodes nodes strongly connected to the specified one |
| |
| Example usage: |
| |
| Show the transitive closure of imports of the digraph tool itself: |
| % go list -f '{{.ImportPath}}{{.Imports}}' ... | tr '[]' ' ' | |
| digraph forward golang.org/x/tools/cmd/digraph |
| |
| Show which clothes (see above) must be donned before a jacket: |
| % digraph reverse jacket <clothes.txt |
| |
| ` |
| |
| func main() { |
| flag.Parse() |
| |
| args := flag.Args() |
| if len(args) == 0 { |
| fmt.Println(Usage) |
| return |
| } |
| |
| if err := digraph(args[0], args[1:]); err != nil { |
| fmt.Fprintf(os.Stderr, "Error: %s\n", err) |
| os.Exit(1) |
| } |
| } |
| |
| type nodelist []string |
| |
| func (l nodelist) println(sep string) { |
| for i, label := range l { |
| if i > 0 { |
| fmt.Fprint(stdout, sep) |
| } |
| fmt.Fprint(stdout, label) |
| } |
| fmt.Fprintln(stdout) |
| } |
| |
| type nodeset map[string]bool |
| |
| func (s nodeset) sort() nodelist { |
| labels := make(nodelist, len(s)) |
| var i int |
| for label := range s { |
| labels[i] = label |
| i++ |
| } |
| sort.Strings(labels) |
| return labels |
| } |
| |
| func (s nodeset) addAll(x nodeset) { |
| for label := range x { |
| s[label] = true |
| } |
| } |
| |
| // A graph maps nodes to the non-nil set of their immediate successors. |
| type graph map[string]nodeset |
| |
| func (g graph) addNode(label string) nodeset { |
| edges := g[label] |
| if edges == nil { |
| edges = make(nodeset) |
| g[label] = edges |
| } |
| return edges |
| } |
| |
| func (g graph) addEdges(from string, to ...string) { |
| edges := g.addNode(from) |
| for _, to := range to { |
| g.addNode(to) |
| edges[to] = true |
| } |
| } |
| |
| func (g graph) reachableFrom(roots nodeset) nodeset { |
| seen := make(nodeset) |
| var visit func(label string) |
| visit = func(label string) { |
| if !seen[label] { |
| seen[label] = true |
| for e := range g[label] { |
| visit(e) |
| } |
| } |
| } |
| for root := range roots { |
| visit(root) |
| } |
| return seen |
| } |
| |
| func (g graph) transpose() graph { |
| rev := make(graph) |
| for label, edges := range g { |
| rev.addNode(label) |
| for succ := range edges { |
| rev.addEdges(succ, label) |
| } |
| } |
| return rev |
| } |
| |
| func (g graph) sccs() []nodeset { |
| // Kosaraju's algorithm---Tarjan is overkill here. |
| |
| // Forward pass. |
| S := make(nodelist, 0, len(g)) // postorder stack |
| seen := make(nodeset) |
| var visit func(label string) |
| visit = func(label string) { |
| if !seen[label] { |
| seen[label] = true |
| for e := range g[label] { |
| visit(e) |
| } |
| S = append(S, label) |
| } |
| } |
| for label := range g { |
| visit(label) |
| } |
| |
| // Reverse pass. |
| rev := g.transpose() |
| var scc nodeset |
| seen = make(nodeset) |
| var rvisit func(label string) |
| rvisit = func(label string) { |
| if !seen[label] { |
| seen[label] = true |
| scc[label] = true |
| for e := range rev[label] { |
| rvisit(e) |
| } |
| } |
| } |
| var sccs []nodeset |
| for len(S) > 0 { |
| top := S[len(S)-1] |
| S = S[:len(S)-1] // pop |
| if !seen[top] { |
| scc = make(nodeset) |
| rvisit(top) |
| sccs = append(sccs, scc) |
| } |
| } |
| return sccs |
| } |
| |
| func parse(rd io.Reader) (graph, error) { |
| g := make(graph) |
| |
| var linenum int |
| in := bufio.NewScanner(rd) |
| for in.Scan() { |
| linenum++ |
| // Split into words, honoring double-quotes per Go spec. |
| words, err := split(in.Text()) |
| if err != nil { |
| return nil, fmt.Errorf("at line %d: %v", linenum, err) |
| } |
| if len(words) > 0 { |
| g.addEdges(words[0], words[1:]...) |
| } |
| } |
| if err := in.Err(); err != nil { |
| return nil, err |
| } |
| return g, nil |
| } |
| |
| var stdin io.Reader = os.Stdin |
| var stdout io.Writer = os.Stdout |
| |
| func digraph(cmd string, args []string) error { |
| // Parse the input graph. |
| g, err := parse(stdin) |
| if err != nil { |
| return err |
| } |
| |
| // Parse the command line. |
| switch cmd { |
| case "nodes": |
| if len(args) != 0 { |
| return fmt.Errorf("usage: nodes") |
| } |
| nodes := make(nodeset) |
| for label := range g { |
| nodes[label] = true |
| } |
| nodes.sort().println("\n") |
| |
| case "degree": |
| if len(args) != 0 { |
| return fmt.Errorf("usage: degree") |
| } |
| nodes := make(nodeset) |
| for label := range g { |
| nodes[label] = true |
| } |
| rev := g.transpose() |
| for _, label := range nodes.sort() { |
| fmt.Fprintf(stdout, "%d\t%d\t%s\n", len(rev[label]), len(g[label]), label) |
| } |
| |
| case "succs", "preds": |
| if len(args) == 0 { |
| return fmt.Errorf("usage: %s <label> ...", cmd) |
| } |
| g := g |
| if cmd == "preds" { |
| g = g.transpose() |
| } |
| result := make(nodeset) |
| for _, root := range args { |
| edges := g[root] |
| if edges == nil { |
| return fmt.Errorf("no such node %q", root) |
| } |
| result.addAll(edges) |
| } |
| result.sort().println("\n") |
| |
| case "forward", "reverse": |
| if len(args) == 0 { |
| return fmt.Errorf("usage: %s <label> ...", cmd) |
| } |
| roots := make(nodeset) |
| for _, root := range args { |
| if g[root] == nil { |
| return fmt.Errorf("no such node %q", root) |
| } |
| roots[root] = true |
| } |
| g := g |
| if cmd == "reverse" { |
| g = g.transpose() |
| } |
| g.reachableFrom(roots).sort().println("\n") |
| |
| case "somepath": |
| if len(args) != 2 { |
| return fmt.Errorf("usage: somepath <from> <to>") |
| } |
| from, to := args[0], args[1] |
| if g[from] == nil { |
| return fmt.Errorf("no such 'from' node %q", from) |
| } |
| if g[to] == nil { |
| return fmt.Errorf("no such 'to' node %q", to) |
| } |
| |
| seen := make(nodeset) |
| var visit func(path nodelist, label string) bool |
| visit = func(path nodelist, label string) bool { |
| if !seen[label] { |
| seen[label] = true |
| if label == to { |
| append(path, label).println("\n") |
| return true // unwind |
| } |
| for e := range g[label] { |
| if visit(append(path, label), e) { |
| return true |
| } |
| } |
| } |
| return false |
| } |
| if !visit(make(nodelist, 0, 100), from) { |
| return fmt.Errorf("no path from %q to %q", args[0], args[1]) |
| } |
| |
| case "allpaths": |
| if len(args) != 2 { |
| return fmt.Errorf("usage: allpaths <from> <to>") |
| } |
| from, to := args[0], args[1] |
| if g[from] == nil { |
| return fmt.Errorf("no such 'from' node %q", from) |
| } |
| if g[to] == nil { |
| return fmt.Errorf("no such 'to' node %q", to) |
| } |
| |
| seen := make(nodeset) // value of seen[x] indicates whether x is on some path to 'to' |
| var visit func(label string) bool |
| visit = func(label string) bool { |
| reachesTo, ok := seen[label] |
| if !ok { |
| reachesTo = label == to |
| |
| seen[label] = reachesTo |
| for e := range g[label] { |
| if visit(e) { |
| reachesTo = true |
| } |
| } |
| seen[label] = reachesTo |
| } |
| return reachesTo |
| } |
| if !visit(from) { |
| return fmt.Errorf("no path from %q to %q", from, to) |
| } |
| for label, reachesTo := range seen { |
| if !reachesTo { |
| delete(seen, label) |
| } |
| } |
| seen.sort().println("\n") |
| |
| case "sccs": |
| if len(args) != 0 { |
| return fmt.Errorf("usage: sccs") |
| } |
| for _, scc := range g.sccs() { |
| scc.sort().println(" ") |
| } |
| |
| case "scc": |
| if len(args) != 1 { |
| return fmt.Errorf("usage: scc <label>") |
| } |
| label := args[0] |
| if g[label] == nil { |
| return fmt.Errorf("no such node %q", label) |
| } |
| for _, scc := range g.sccs() { |
| if scc[label] { |
| scc.sort().println("\n") |
| break |
| } |
| } |
| |
| default: |
| return fmt.Errorf("no such command %q", cmd) |
| } |
| |
| return nil |
| } |
| |
| // -- Utilities -------------------------------------------------------- |
| |
| // split splits a line into words, which are generally separated by |
| // spaces, but Go-style double-quoted string literals are also supported. |
| // (This approximates the behaviour of the Bourne shell.) |
| // |
| // `one "two three"` -> ["one" "two three"] |
| // `a"\n"b` -> ["a\nb"] |
| // |
| func split(line string) ([]string, error) { |
| var ( |
| words []string |
| inWord bool |
| current bytes.Buffer |
| ) |
| |
| for len(line) > 0 { |
| r, size := utf8.DecodeRuneInString(line) |
| if unicode.IsSpace(r) { |
| if inWord { |
| words = append(words, current.String()) |
| current.Reset() |
| inWord = false |
| } |
| } else if r == '"' { |
| var ok bool |
| size, ok = quotedLength(line) |
| if !ok { |
| return nil, errors.New("invalid quotation") |
| } |
| s, err := strconv.Unquote(line[:size]) |
| if err != nil { |
| return nil, err |
| } |
| current.WriteString(s) |
| inWord = true |
| } else { |
| current.WriteRune(r) |
| inWord = true |
| } |
| line = line[size:] |
| } |
| if inWord { |
| words = append(words, current.String()) |
| } |
| return words, nil |
| } |
| |
| // quotedLength returns the length in bytes of the prefix of input that |
| // contain a possibly-valid double-quoted Go string literal. |
| // |
| // On success, n is at least two (""); input[:n] may be passed to |
| // strconv.Unquote to interpret its value, and input[n:] contains the |
| // rest of the input. |
| // |
| // On failure, quotedLength returns false, and the entire input can be |
| // passed to strconv.Unquote if an informative error message is desired. |
| // |
| // quotedLength does not and need not detect all errors, such as |
| // invalid hex or octal escape sequences, since it assumes |
| // strconv.Unquote will be applied to the prefix. It guarantees only |
| // that if there is a prefix of input containing a valid string literal, |
| // its length is returned. |
| // |
| // TODO(adonovan): move this into a strconv-like utility package. |
| // |
| func quotedLength(input string) (n int, ok bool) { |
| var offset int |
| |
| // next returns the rune at offset, or -1 on EOF. |
| // offset advances to just after that rune. |
| next := func() rune { |
| if offset < len(input) { |
| r, size := utf8.DecodeRuneInString(input[offset:]) |
| offset += size |
| return r |
| } |
| return -1 |
| } |
| |
| if next() != '"' { |
| return // error: not a quotation |
| } |
| |
| for { |
| r := next() |
| if r == '\n' || r < 0 { |
| return // error: string literal not terminated |
| } |
| if r == '"' { |
| return offset, true // success |
| } |
| if r == '\\' { |
| var skip int |
| switch next() { |
| case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"': |
| skip = 0 |
| case '0', '1', '2', '3', '4', '5', '6', '7': |
| skip = 2 |
| case 'x': |
| skip = 2 |
| case 'u': |
| skip = 4 |
| case 'U': |
| skip = 8 |
| default: |
| return // error: invalid escape |
| } |
| |
| for i := 0; i < skip; i++ { |
| next() |
| } |
| } |
| } |
| } |