| // Copyright 2016 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package main |
| |
| import ( |
| "bytes" |
| "errors" |
| "fmt" |
| "io" |
| "log" |
| "math" |
| "os" |
| "reflect" |
| "regexp" |
| "sort" |
| "strconv" |
| "strings" |
| "time" |
| |
| "rsc.io/pdf" |
| ) |
| |
| // listing holds information about one or more parsed manual pages |
| // concerning a single instruction listing. |
| type listing struct { |
| pageNum int |
| name string // instruction heading |
| mtables [][][]string // mnemonic tables (at most one per page) |
| enctables [][][]string // encoding tables (at most one per page) |
| compat string |
| } |
| |
| type logReaderAt struct { |
| f io.ReaderAt |
| } |
| |
| func (l *logReaderAt) ReadAt(x []byte, off int64) (int, error) { |
| log.Printf("read %d @ %d", len(x), off) |
| return l.f.ReadAt(x, off) |
| } |
| |
| const ( |
| cacheBlockSize = 64 * 1024 |
| numCacheBlock = 16 |
| ) |
| |
| type cachedReaderAt struct { |
| r io.ReaderAt |
| cache *cacheBlock |
| } |
| |
| type cacheBlock struct { |
| next *cacheBlock |
| buf []byte |
| offset int64 |
| err error |
| } |
| |
| func newCachedReaderAt(r io.ReaderAt) *cachedReaderAt { |
| c := &cachedReaderAt{ |
| r: r, |
| } |
| for i := 0; i < numCacheBlock; i++ { |
| c.cache = &cacheBlock{next: c.cache} |
| } |
| return c |
| } |
| |
| func (c *cachedReaderAt) ReadAt(p []byte, offset int64) (n int, err error) { |
| // Assume large reads indicate a caller that doesn't need caching. |
| if len(p) >= cacheBlockSize { |
| return c.r.ReadAt(p, offset) |
| } |
| |
| for n < len(p) { |
| o := offset + int64(n) |
| f := o & (cacheBlockSize - 1) |
| b := c.readBlock(o - f) |
| n += copy(p[n:], b.buf[f:]) |
| if n < len(p) && b.err != nil { |
| return n, b.err |
| } |
| } |
| return n, nil |
| } |
| |
| var errShortRead = errors.New("short read") |
| |
| func (c *cachedReaderAt) readBlock(offset int64) *cacheBlock { |
| if offset&(cacheBlockSize-1) != 0 { |
| panic("misuse of cachedReaderAt.readBlock") |
| } |
| |
| // Look in cache. |
| var b, prev *cacheBlock |
| for b = c.cache; ; prev, b = b, b.next { |
| if b.buf != nil && b.offset == offset { |
| // Move to front. |
| if prev != nil { |
| prev.next = b.next |
| b.next = c.cache |
| c.cache = b |
| } |
| return b |
| } |
| if b.next == nil { |
| break |
| } |
| } |
| |
| // Otherwise b is LRU block in cache, prev points at b. |
| if b.buf == nil { |
| b.buf = make([]byte, cacheBlockSize) |
| } |
| b.offset = offset |
| n, err := c.r.ReadAt(b.buf[:cacheBlockSize], offset) |
| b.buf = b.buf[:n] |
| b.err = err |
| if n > 0 { |
| // Move to front. |
| prev.next = nil |
| b.next = c.cache |
| c.cache = b |
| } |
| return b |
| } |
| |
| func pdfOpen(name string) (*pdf.Reader, error) { |
| f, err := os.Open(name) |
| if err != nil { |
| return nil, err |
| } |
| fi, err := f.Stat() |
| if err != nil { |
| f.Close() |
| return nil, err |
| } |
| return pdf.NewReader(newCachedReaderAt(f), fi.Size()) |
| } |
| |
| func parse() []*instruction { |
| var insts []*instruction |
| |
| f, err := pdfOpen(*flagFile) |
| if err != nil { |
| log.Fatal(err) |
| } |
| |
| // Find instruction set reference in outline, to build instruction list. |
| instList := instHeadings(f.Outline()) |
| if len(instList) < 200 { |
| log.Fatalf("only found %d instructions in table of contents", len(instList)) |
| } |
| |
| // Scan document looking for instructions. |
| // Must find exactly the ones in the outline. |
| n := f.NumPage() |
| var current *listing |
| finishInstruction := func() { |
| if current == nil { |
| return |
| } |
| if len(current.mtables) == 0 || len(current.mtables[0]) <= 1 { |
| fmt.Fprintf(os.Stderr, "p.%d: no mnemonics for instruction %q\n", current.pageNum, current.name) |
| } |
| processListing(current, &insts) |
| current = nil |
| } |
| |
| for pageNum := 1; pageNum <= n; pageNum++ { |
| if onlySomePages && !isDebugPage(pageNum) { |
| continue |
| } |
| p := f.Page(pageNum) |
| parsed := parsePage(p, pageNum) |
| if parsed.name != "" { |
| finishInstruction() |
| for j, headline := range instList { |
| if parsed.name == headline { |
| instList[j] = "" |
| current = parsed |
| break |
| } |
| } |
| if current == nil { |
| fmt.Fprintf(os.Stderr, "p.%d: unexpected instruction %q\n", pageNum, parsed.name) |
| } |
| continue |
| } |
| if current != nil { |
| merge(current, parsed) |
| continue |
| } |
| if parsed.mtables != nil { |
| fmt.Fprintf(os.Stderr, "p.%d: unexpected mnemonic table\n", pageNum) |
| } |
| if parsed.enctables != nil { |
| fmt.Fprintf(os.Stderr, "p.%d: unexpected encoding table\n", pageNum) |
| } |
| if parsed.compat != "" { |
| fmt.Fprintf(os.Stderr, "p.%d: unexpected compatibility statement\n", pageNum) |
| } |
| } |
| finishInstruction() |
| |
| if !onlySomePages { |
| for _, headline := range instList { |
| if headline != "" { |
| fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline) |
| } |
| } |
| } |
| |
| return insts |
| } |
| |
| // isDebugPage reports whether the -debugpage flag mentions page n. |
| // The argument is a comma-separated list of pages. |
| // Maybe some day it will support ranges. |
| func isDebugPage(n int) bool { |
| s := *flagDebugPage |
| var k int |
| for i := 0; ; i++ { |
| if i == len(s) || s[i] == ',' { |
| if n == k { |
| return true |
| } |
| k = 0 |
| } |
| if i == len(s) { |
| break |
| } |
| if '0' <= s[i] && s[i] <= '9' { |
| k = k*10 + int(s[i]) - '0' |
| } |
| } |
| return false |
| } |
| |
| // merge merges the content of y into the running collection in x. |
| func merge(x, y *listing) { |
| if y.name != "" { |
| fmt.Fprintf(os.Stderr, "p.%d: merging page incorrectly\n", y.pageNum) |
| return |
| } |
| |
| x.mtables = append(x.mtables, y.mtables...) |
| x.enctables = append(x.enctables, y.enctables...) |
| x.compat += y.compat |
| } |
| |
| // instHeadings returns the list of instruction headings from the table of contents. |
| // When we parse the pages we expect to find every one of these. |
| func instHeadings(outline pdf.Outline) []string { |
| return appendInstHeadings(outline, nil) |
| } |
| |
| var instRE = regexp.MustCompile(`\d Instructions \([A-Z]-[A-Z]\)|VMX Instructions|Instruction SET Reference|SHA Extensions Reference`) |
| |
| // The headings are inconsistent about dash and superscript usage. Normalize. |
| var fixDash = strings.NewReplacer( |
| "Compute 2 –1", "Compute 2^x-1", |
| "Compute 2x-1", "Compute 2^x-1", |
| "Compute 2x–1", "Compute 2^x-1", |
| "/ FUCOMI", "/FUCOMI", |
| "Compute y ∗ log x", "Compute y * log₂x", |
| "Compute y * log2x", "Compute y * log₂x", |
| "Compute y * log2(x +1)", "Compute y * log₂(x+1)", |
| "Compute y ∗ log (x +1)", "Compute y * log₂(x+1)", |
| " — ", "-", |
| "— ", "-", |
| " —", "-", |
| "—", "-", |
| " – ", "-", |
| " –", "-", |
| "– ", "-", |
| "–", "-", |
| " - ", "-", |
| "- ", "-", |
| " -", "-", |
| ) |
| |
| func appendInstHeadings(outline pdf.Outline, list []string) []string { |
| if instRE.MatchString(outline.Title) { |
| for _, child := range outline.Child { |
| list = append(list, fixDash.Replace(child.Title)) |
| } |
| } |
| for _, child := range outline.Child { |
| list = appendInstHeadings(child, list) |
| } |
| return list |
| } |
| |
| var dateRE = regexp.MustCompile(`\b(January|February|March|April|May|June|July|August|September|October|November|December) ((19|20)[0-9][0-9])\b`) |
| |
| // parsePage parses a single PDF page and returns the content it found. |
| func parsePage(p pdf.Page, pageNum int) *listing { |
| if debugging { |
| fmt.Fprintf(os.Stderr, "DEBUG: parsing page %d\n", pageNum) |
| } |
| |
| parsed := new(listing) |
| parsed.pageNum = pageNum |
| |
| content := p.Content() |
| |
| for i, t := range content.Text { |
| if match(t, "Symbol", 11, "≠") { |
| t.Font = "NeoSansIntel" |
| t.FontSize = 9 |
| content.Text[i] = t |
| } |
| if t.S == "*" || t.S == "**" || t.S == "***" || t.S == "," && t.Font == "Arial" && t.FontSize < 9 || t.S == "1" && t.Font == "Arial" { |
| t.Font = "NeoSansIntel" |
| t.FontSize = 9 |
| if i+1 < len(content.Text) { |
| t.Y = content.Text[i+1].Y |
| } |
| content.Text[i] = t |
| } |
| } |
| |
| text := findWords(content.Text) |
| |
| for i, t := range text { |
| if match(t, "NeoSansIntel", 8, ".WIG") || match(t, "NeoSansIntel", 8, "AVX2") { |
| t.FontSize = 9 |
| text[i] = t |
| } |
| if t.Font == "NeoSansIntel-Medium" { |
| t.Font = "NeoSansIntelMedium" |
| text[i] = t |
| } |
| if t.Font == "NeoSansIntel-Italic" { |
| t.Font = "NeoSansIntel,Italic" |
| text[i] = t |
| } |
| } |
| |
| if debugging { |
| for _, t := range text { |
| fmt.Println(t) |
| } |
| } |
| |
| if pageNum == 1 { |
| var buf bytes.Buffer |
| for _, t := range text { |
| buf.WriteString(t.S + "\n") |
| } |
| all := buf.String() |
| m := regexp.MustCompile(`Order Number: ([\w-\-]+)`).FindStringSubmatch(all) |
| num := "???" |
| if m != nil { |
| num = m[1] |
| } |
| date := dateRE.FindString(all) |
| if date == "" { |
| date = "???" |
| } |
| |
| fmt.Printf("# x86 instruction set description version %s, %s\n", |
| specFormatVersion, time.Now().Format("2006-01-02")) |
| fmt.Printf("# Based on Intel Instruction Set Reference #%s, %s.\n", num, date) |
| fmt.Printf("# https://golang.org/x/arch/x86/x86spec\n") |
| } |
| |
| // Remove text we should ignore. |
| out := text[:0] |
| for _, t := range text { |
| if shouldIgnore(t) { |
| continue |
| } |
| out = append(out, t) |
| } |
| text = out |
| |
| // Page header must say instruction set reference. |
| if len(text) == 0 { |
| return parsed |
| } |
| if (!match(text[0], "NeoSansIntel", 9, "INSTRUCTION") || !match(text[0], "NeoSansIntel", 9, "REFERENCE")) && |
| !match(text[0], "NeoSansIntel", 9, "EXTENSIONS") { |
| return parsed |
| } |
| text = text[1:] |
| |
| enctable := findEncodingTable(text) |
| if enctable != nil { |
| parsed.enctables = append(parsed.enctables, enctable) |
| } |
| |
| parsed.compat = findCompat(text) |
| |
| // Narrow scope for finding mnemonic table. |
| // Must be last, since it trims text. |
| // Next line is headline. Can wrap to multiple lines. |
| if len(text) == 0 || !match(text[0], "NeoSansIntelMedium", 12, "") || !isInstHeadline(text[0].S) { |
| if debugging { |
| fmt.Fprintf(os.Stderr, "non-inst-headline: %v\n", text[0]) |
| } |
| } else { |
| parsed.name = text[0].S |
| text = text[1:] |
| for len(text) > 0 && match(text[0], "NeoSansIntelMedium", 12, "") { |
| parsed.name += " " + text[0].S |
| text = text[1:] |
| } |
| parsed.name = fixDash.Replace(parsed.name) |
| } |
| |
| // Table follows; heading is NeoSansIntelMedium and rows are NeoSansIntel. |
| i := 0 |
| for i < len(text) && match(text[i], "NeoSansIntelMedium", 9, "") { |
| i++ |
| } |
| for i < len(text) && match(text[i], "NeoSansIntel", 9, "") && text[i].S != "NOTES:" { |
| i++ |
| } |
| |
| mtable := findMnemonicTable(text[:i]) |
| if mtable != nil { |
| parsed.mtables = append(parsed.mtables, mtable) |
| } |
| |
| return parsed |
| } |
| |
| func match(t pdf.Text, font string, size float64, substr string) bool { |
| return t.Font == font && math.Abs(t.FontSize-size) < 0.1 && strings.Contains(t.S, substr) |
| } |
| |
| func shouldIgnore(t pdf.Text) bool { |
| // Ignore footnote stars, which are in Arial. |
| // Also, the page describing MOVS has a tiny 2pt Arial backslash. |
| if (t.S == "*" || t.S == "\\") && strings.HasPrefix(t.Font, "Arial") { |
| return true |
| } |
| |
| // Ignore superscript numbers, superscript ST(0), and superscript x. |
| if len(t.S) == 1 && '1' <= t.S[0] && t.S[0] <= '9' || t.S == "ST(0)" || t.S == "x" { |
| if match(t, "NeoSansIntel", 7.2, "") || match(t, "NeoSansIntel", 5.6, "") || match(t, "NeoSansIntelMedium", 8, "") || match(t, "NeoSansIntelMedium", 9.6, "") { |
| return true |
| } |
| } |
| |
| return false |
| } |
| |
| func isInstHeadline(s string) bool { |
| return strings.Contains(s, "—") || |
| strings.Contains(s, " - ") || |
| strings.Contains(s, "PTEST- Logical Compare") |
| } |
| |
| func findWords(chars []pdf.Text) (words []pdf.Text) { |
| // Sort by Y coordinate and normalize. |
| const nudge = 1 |
| sort.Sort(pdf.TextVertical(chars)) |
| old := -100000.0 |
| for i, c := range chars { |
| if c.Y != old && math.Abs(old-c.Y) < nudge { |
| chars[i].Y = old |
| } else { |
| old = c.Y |
| } |
| } |
| |
| // Sort by Y coordinate, breaking ties with X. |
| // This will bring letters in a single word together. |
| sort.Sort(pdf.TextVertical(chars)) |
| |
| // Loop over chars. |
| for i := 0; i < len(chars); { |
| // Find all chars on line. |
| j := i + 1 |
| for j < len(chars) && chars[j].Y == chars[i].Y { |
| j++ |
| } |
| var end float64 |
| // Split line into words (really, phrases). |
| for k := i; k < j; { |
| ck := &chars[k] |
| s := ck.S |
| end = ck.X + ck.W |
| charSpace := ck.FontSize / 6 |
| wordSpace := ck.FontSize * 2 / 3 |
| l := k + 1 |
| for l < j { |
| // Grow word. |
| cl := &chars[l] |
| if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+charSpace { |
| s += cl.S |
| end = cl.X + cl.W |
| l++ |
| continue |
| } |
| // Add space to phrase before next word. |
| if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+wordSpace { |
| s += " " + cl.S |
| end = cl.X + cl.W |
| l++ |
| continue |
| } |
| break |
| } |
| f := ck.Font |
| f = strings.TrimSuffix(f, ",Italic") |
| f = strings.TrimSuffix(f, "-Italic") |
| words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end, s}) |
| k = l |
| } |
| i = j |
| } |
| |
| return words |
| } |
| |
| func sameFont(f1, f2 string) bool { |
| f1 = strings.TrimSuffix(f1, ",Italic") |
| f1 = strings.TrimSuffix(f1, "-Italic") |
| f2 = strings.TrimSuffix(f1, ",Italic") |
| f2 = strings.TrimSuffix(f1, "-Italic") |
| return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman" |
| } |
| |
| func findMnemonicTable(text []pdf.Text) [][]string { |
| sort.Sort(pdf.TextHorizontal(text)) |
| |
| const nudge = 1 |
| |
| old := -100000.0 |
| var col []float64 |
| for i, t := range text { |
| if t.Font != "NeoSansIntelMedium" { // only headings count |
| continue |
| } |
| if t.X != old && math.Abs(old-t.X) < nudge { |
| text[i].X = old |
| } else if t.X != old { |
| old = t.X |
| col = append(col, old) |
| } |
| } |
| sort.Sort(pdf.TextVertical(text)) |
| |
| if len(col) == 0 { |
| return nil |
| } |
| |
| y := -100000.0 |
| var table [][]string |
| var line []string |
| bold := -1 |
| for _, t := range text { |
| if t.Y != y { |
| table = append(table, make([]string, len(col))) |
| line = table[len(table)-1] |
| y = t.Y |
| if t.Font == "NeoSansIntelMedium" { |
| bold = len(table) - 1 |
| } |
| } |
| i := 0 |
| for i+1 < len(col) && col[i+1] <= t.X+nudge { |
| i++ |
| } |
| if line[i] != "" { |
| line[i] += " " |
| } |
| line[i] += t.S |
| } |
| |
| var mtable [][]string |
| for i, t := range table { |
| if 0 < i && i <= bold || bold < i && halfMissing(t) { |
| // merge with earlier line |
| last := mtable[len(mtable)-1] |
| for j, s := range t { |
| if s != "" { |
| last[j] += "\n" + s |
| } |
| } |
| } else { |
| mtable = append(mtable, t) |
| } |
| } |
| |
| if bold >= 0 { |
| heading := mtable[0] |
| for i, x := range heading { |
| heading[i] = fixHeading.Replace(x) |
| } |
| } |
| |
| return mtable |
| } |
| |
| var fixHeading = strings.NewReplacer( |
| "64/32-\nbit\nMode", "64/32-Bit Mode", |
| "64/32-\nbit Mode", "64/32-Bit Mode", |
| "64/32-bit\nMode", "64/32-Bit Mode", |
| "64/3\n2-bit\nMode", "64/32-Bit Mode", |
| "64/32 bit\nMode\nSupport", "64/32-Bit Mode", |
| "64/32bit\nMode\nSupport", "64/32-Bit Mode", |
| "64/32\n-bit\nMode", "64/32-Bit Mode", |
| "64/32\nbit Mode\nSupport", "64/32-Bit Mode", |
| "64-Bit\nMode", "64-Bit Mode", |
| "64-bit\nMode", "64-Bit Mode", |
| |
| "Op/ En", "Op/En", |
| "Op/\nEn", "Op/En", |
| "Op/\nEN", "Op/En", |
| "Op /\nEn", "Op/En", |
| "Opcode***", "Opcode", |
| "Opcode**", "Opcode", |
| "Opcode*", "Opcode", |
| "/\nInstruction", "/Instruction", |
| |
| "CPUID Fea-\nture Flag", "CPUID Feature Flag", |
| "CPUID\nFeature\nFlag", "CPUID Feature Flag", |
| "CPUID\nFeature Flag", "CPUID Feature Flag", |
| "CPUIDFeature\nFlag", "CPUID Feature Flag", |
| |
| "Compat/\nLeg Mode*", "Compat/Leg Mode", |
| "Compat/\nLeg Mode", "Compat/Leg Mode", |
| "Compat/ *\nLeg Mode", "Compat/Leg Mode", |
| ) |
| |
| func halfMissing(x []string) bool { |
| n := 0 |
| for _, s := range x { |
| if s == "" { |
| n++ |
| } |
| } |
| return n >= len(x)/2 |
| } |
| |
| func findEncodingTable(text []pdf.Text) [][]string { |
| // Look for operand encoding table. |
| sort.Sort(pdf.TextVertical(text)) |
| var col []float64 |
| sawTitle := false |
| |
| center := func(t pdf.Text) float64 { |
| return t.X + t.W/2 |
| } |
| |
| start := 0 |
| end := len(text) |
| for i, t := range text { |
| if match(t, "NeoSansIntelMedium", 10, "Instruction Operand Encoding") { |
| sawTitle = true |
| start = i + 1 |
| continue |
| } |
| if !sawTitle { |
| continue |
| } |
| if match(t, "NeoSansIntel", 9, "Op/En") || match(t, "NeoSansIntel", 9, "Operand") { |
| if debugging { |
| fmt.Printf("column %d at %.2f: %v\n", len(col), center(t), t) |
| } |
| col = append(col, center(t)) |
| } |
| if match(t, "NeoSansIntelMedium", 10, "Description") { |
| end = i |
| break |
| } |
| } |
| text = text[start:end] |
| |
| if len(col) == 0 { |
| return nil |
| } |
| |
| const nudge = 20 |
| |
| y := -100000.0 |
| var table [][]string |
| var line []string |
| for _, t := range text { |
| if t.Y != y { |
| table = append(table, make([]string, len(col))) |
| line = table[len(table)-1] |
| y = t.Y |
| } |
| i := 0 |
| x := center(t) |
| for i+1 < len(col) && col[i+1] <= x+nudge { |
| i++ |
| } |
| if debugging { |
| fmt.Printf("text at %.2f: %v => %d\n", x, t, i) |
| } |
| if line[i] != "" { |
| line[i] += " " |
| } |
| line[i] += t.S |
| } |
| |
| out := table[:0] |
| for _, line := range table { |
| if strings.HasPrefix(line[len(line)-1], "Vol. 2") { // page footer |
| continue |
| } |
| if line[0] == "" && len(out) > 0 { |
| last := out[len(out)-1] |
| for i, col := range line { |
| if col != "" { |
| last[i] += " " + col |
| } |
| } |
| continue |
| } |
| out = append(out, line) |
| } |
| table = out |
| |
| return table |
| } |
| |
| func findCompat(text []pdf.Text) string { |
| sort.Sort(pdf.TextVertical(text)) |
| |
| inCompat := false |
| out := "" |
| for _, t := range text { |
| if match(t, "NeoSansIntelMedium", 10, "") { |
| inCompat = strings.Contains(t.S, "Architecture Compatibility") |
| if inCompat { |
| out += t.S + "\n" |
| } |
| } |
| if inCompat && match(t, "Verdana", 9, "") || strings.Contains(t.S, "were introduced") { |
| out += t.S + "\n" |
| } |
| } |
| return out |
| } |
| |
| func processListing(p *listing, insts *[]*instruction) { |
| if debugging { |
| for _, table := range p.mtables { |
| fmt.Printf("table:\n") |
| for _, row := range table { |
| fmt.Printf("%q\n", row) |
| } |
| } |
| fmt.Printf("enctable:\n") |
| for _, table := range p.enctables { |
| for _, row := range table { |
| fmt.Printf("%q\n", row) |
| } |
| } |
| fmt.Printf("compat:\n%s", p.compat) |
| } |
| |
| if *flagCompat && p.compat != "" { |
| fmt.Printf("# p.%d: %s\n#\t%s\n", p.pageNum, p.name, strings.Replace(p.compat, "\n", "\n#\t", -1)) |
| } |
| |
| encs := make(map[string][]string) |
| for _, table := range p.enctables { |
| for _, row := range table[1:] { |
| for len(row) > 1 && (row[len(row)-1] == "NA" || row[len(row)-1] == "" || row[len(row)-1] == " source") { |
| row = row[:len(row)-1] |
| } |
| encs[row[0]] = row[1:] |
| } |
| } |
| |
| var wrong string |
| for _, table := range p.mtables { |
| heading := table[0] |
| for _, row := range table[1:] { |
| if row[0] == heading[0] && reflect.DeepEqual(row, heading) { |
| continue |
| } |
| if len(row) >= 5 && row[1] == "CMOVG r64, r/m64" && row[3] == "V/N.E." && row[4] == "NA" { |
| row[3] = "V" |
| row[4] = "N.E." |
| } |
| inst := new(instruction) |
| inst.page = p.pageNum |
| inst.compat = strings.Join(strings.Fields(p.compat), " ") |
| for i, hdr := range heading { |
| x := row[i] |
| x = strings.Replace(x, "\n", " ", -1) |
| switch strings.TrimSpace(hdr) { |
| default: |
| wrong = "unexpected header: " + strconv.Quote(hdr) |
| goto BadTable |
| case "Opcode/Instruction": |
| x = row[i] |
| if strings.HasPrefix(x, "\nVEX") { |
| x = x[1:] |
| row[i] = x |
| } |
| if strings.Contains(x, "\n/r ") { |
| x = strings.Replace(x, "\n/r ", " /r ", -1) |
| row[i] = x |
| } |
| if strings.Contains(x, ",\nimm") { |
| x = strings.Replace(x, ",\nimm", ", imm", -1) |
| row[i] = x |
| } |
| if strings.Count(x, "\n") < 1 { |
| wrong = "bad Opcode/Instruction pairing: " + strconv.Quote(x) |
| goto BadTable |
| } |
| i := strings.Index(x, "\n") |
| inst.opcode = x[:i] |
| inst.syntax = strings.Replace(x[i+1:], "\n", " ", -1) |
| |
| case "Opcode": |
| inst.opcode = x |
| |
| case "Instruction": |
| inst.syntax = x |
| |
| case "Op/En": |
| inst.args = encs[x] |
| if inst.args == nil && len(encs) == 1 && encs["A"] != nil { |
| inst.args = encs["A"] |
| } |
| // In the December 2015 manual, PREFETCHW says |
| // encoding A but the table gives encoding M. |
| if inst.args == nil && inst.syntax == "PREFETCHW m8" && x == "A" && len(encs) == 1 && encs["M"] != nil { |
| inst.args = encs["M"] |
| } |
| |
| case "64-Bit Mode": |
| x, ok := parseMode(x) |
| if !ok { |
| wrong = "unexpected value for 64-Bit Mode column: " + x |
| goto BadTable |
| } |
| inst.valid64 = x |
| |
| case "Compat/Leg Mode": |
| x, ok := parseMode(x) |
| if !ok { |
| wrong = "unexpected value for Compat/Leg Mode column: " + x |
| goto BadTable |
| } |
| inst.valid32 = x |
| |
| case "64/32-Bit Mode": |
| i := strings.Index(x, "/") |
| if i < 0 { |
| wrong = "unexpected value for 64/32-Bit Mode column: " + x |
| goto BadTable |
| } |
| x1, ok1 := parseMode(x[:i]) |
| x2, ok2 := parseMode(x[i+1:]) |
| if !ok1 || !ok2 { |
| wrong = "unexpected value for 64/32-Bit Mode column: " + x |
| goto BadTable |
| } |
| inst.valid64 = x1 |
| inst.valid32 = x2 |
| |
| case "CPUID Feature Flag": |
| inst.cpuid = x |
| |
| case "Description": |
| if inst.desc != "" { |
| inst.desc += " " |
| } |
| inst.desc += x |
| } |
| } |
| |
| // Fixup various typos or bugs in opcode descriptions. |
| if inst.opcode == "VEX.128.66.0F.W0 6E /" { |
| inst.opcode += "r" |
| } |
| fix := func(old, new string) { |
| inst.opcode = strings.Replace(inst.opcode, old, new, -1) |
| } |
| fix(" imm8", " ib") |
| fix("REX.w", "REX.W") |
| fix("REX.W+", "REX.W +") |
| fix(" 0f ", " 0F ") |
| fix(". 0F38", ".0F38") |
| fix("0F .WIG", "0F.WIG") |
| fix("0F38 .WIG", "0F38.WIG") |
| fix("NDS .LZ", "NDS.LZ") |
| fix("58+ r", "58+r") |
| fix("B0+ ", "B0+") |
| fix("B8+ ", "B8+") |
| fix("40+ ", "40+") |
| fix("*", "") |
| fix(",", " ") |
| fix("/", " /") |
| fix("REX.W +", "REX.W") |
| fix("REX +", "REX") |
| fix("REX 0F BE", "REX.W 0F BE") |
| fix("REX 0F B2", "REX.W 0F B2") |
| fix("REX 0F B4", "REX.W 0F B4") |
| fix("REX 0F B5", "REX.W 0F B5") |
| fix("0F38.0", "0F38.W0") |
| fix(".660F.", ".66.0F.") |
| fix("VEX128", "VEX.128") |
| fix("0F3A.W0.1D", "0F3A.W0 1D") |
| |
| inst.opcode = strings.Join(strings.Fields(inst.opcode), " ") |
| |
| fix = func(old, new string) { |
| inst.syntax = strings.Replace(inst.syntax, old, new, -1) |
| } |
| fix("xmm1 xmm2", "xmm1, xmm2") |
| fix("r16/m16", "r/m16") |
| fix("r32/m161", "r32/m16") // really r32/m16¹ (footnote) |
| fix("r32/m32", "r/m32") |
| fix("r64/m64", "r/m64") |
| fix("\u2013", "-") |
| fix("mm3 /m", "mm3/m") |
| fix("mm3/.m", "mm3/m") |
| inst.syntax = joinSyntax(splitSyntax(inst.syntax)) |
| |
| fix = func(old, new string) { |
| inst.cpuid = strings.Replace(inst.cpuid, old, new, -1) |
| } |
| fix("PCLMUL- QDQ", "PCLMULQDQ") |
| fix("PCL- MULQDQ", "PCLMULQDQ") |
| fix("Both PCLMULQDQ and AVX flags", "PCLMULQDQ+AVX") |
| |
| if !instBlacklist[inst.syntax] { |
| *insts = append(*insts, inst) |
| } |
| } |
| } |
| return |
| |
| BadTable: |
| fmt.Fprintf(os.Stderr, "p.%d: reading %v: %v\n", p.pageNum, p.name, wrong) |
| for _, table := range p.mtables { |
| for _, t := range table { |
| fmt.Fprintf(os.Stderr, "\t%q\n", t) |
| } |
| } |
| fmt.Fprintf(os.Stderr, "\n") |
| } |
| |
| func parseMode(s string) (string, bool) { |
| switch strings.TrimSpace(s) { |
| case "Invalid", "Invalid*", "Inv.", "I", "i": |
| return "I", true |
| case "Valid", "Valid*", "V": |
| return "V", true |
| case "N.E.", "NE", "N. E.": |
| return "N.E.", true |
| case "N.P.", "N. P.": |
| return "N.P.", true |
| case "N.S.", "N. S.": |
| return "N.S.", true |
| case "N.I.", "N. I.": |
| return "N.I.", true |
| } |
| return s, false |
| } |
| |
| func splitSyntax(syntax string) (op string, args []string) { |
| i := strings.Index(syntax, " ") |
| if i < 0 { |
| return syntax, nil |
| } |
| op, syntax = syntax[:i], syntax[i+1:] |
| args = strings.Split(syntax, ",") |
| for i, arg := range args { |
| arg = strings.TrimSpace(arg) |
| arg = strings.TrimRight(arg, "*") |
| args[i] = arg |
| } |
| return |
| } |
| |
| func joinSyntax(op string, args []string) string { |
| if len(args) == 0 { |
| return op |
| } |
| return op + " " + strings.Join(args, ", ") |
| } |