5 files changed, 525 insertions, 84 deletions
diff --git a/src/index/suffixarray/gen64.go b/src/index/suffixarray/gen64.go
new file mode 100644
index 0000000000..4f0e35e227
--- /dev/null
+++ b/src/index/suffixarray/gen64.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Gen64 generates qsufsort64.go from qsufsort.go by s/32/64/g.
+package main
+
+import (
+	"bytes"
+	"io/ioutil"
+	"log"
+)
+
+func main() {
+	log.SetPrefix("gen64: ")
+	log.SetFlags(0)
+
+	data, err := ioutil.ReadFile("qsufsort.go")
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	data = bytes.Replace(data, []byte("\n\n"), []byte("\n\n// Code generated by gen64.go; DO NOT EDIT.\n//go:generate go run gen64.go\n\n"), 1)
+	data = bytes.Replace(data, []byte("32"), []byte("64"), -1)
+
+	if err := ioutil.WriteFile("qsufsort64.go", data, 0666); err != nil {
+		log.Fatal(err)
+	}
+}
diff --git a/src/index/suffixarray/qsufsort.go b/src/index/suffixarray/qsufsort.go
index 9c36a98f82..71aae02c0a 100644
--- a/src/index/suffixarray/qsufsort.go
+++ b/src/index/suffixarray/qsufsort.go
@@ -24,26 +24,28 @@
 
 package suffixarray
 
-import "sort"
+import (
+	"sort"
+)
 
-func qsufsort(data []byte) []int {
+func qsufsort32(data []byte) []int32 {
 	// initial sorting by first byte of suffix
-	sa := sortedByFirstByte(data)
+	sa := sortedByFirstByte32(data)
 	if len(sa) < 2 {
 		return sa
 	}
 	// initialize the group lookup table
 	// this becomes the inverse of the suffix array when all groups are sorted
-	inv := initGroups(sa, data)
+	inv := initGroups32(sa, data)
 
 	// the index starts 1-ordered
-	sufSortable := &suffixSortable{sa: sa, inv: inv, h: 1}
+	sufSortable := &suffixSortable32{sa: sa, inv: inv, h: 1}
 
-	for sa[0] > -len(sa) { // until all suffixes are one big sorted group
+	for sa[0] > -int32(len(sa)) { // until all suffixes are one big sorted group
 		// The suffixes are h-ordered, make them 2*h-ordered
-		pi := 0 // pi is first position of first group
-		sl := 0 // sl is negated length of sorted groups
-		for pi < len(sa) {
+		pi := int32(0) // pi is first position of first group
+		sl := int32(0) // sl is negated length of sorted groups
+		for pi < int32(len(sa)) {
 			if s := sa[pi]; s < 0 { // if pi starts sorted group
 				pi -= s // skip over sorted group
 				sl += s // add negated length to sl
@@ -67,12 +69,12 @@ func qsufsort(data []byte) []int {
 	}
 
 	for i := range sa { // reconstruct suffix array from inverse
-		sa[inv[i]] = i
+		sa[inv[i]] = int32(i)
 	}
 	return sa
 }
 
-func sortedByFirstByte(data []byte) []int {
+func sortedByFirstByte32(data []byte) []int32 {
 	// total byte counts
 	var count [256]int
 	for _, b := range data {
@@ -84,20 +86,20 @@ func sortedByFirstByte(data []byte) []int {
 		count[b], sum = sum, count[b]+sum
 	}
 	// iterate through bytes, placing index into the correct spot in sa
-	sa := make([]int, len(data))
+	sa := make([]int32, len(data))
 	for i, b := range data {
-		sa[count[b]] = i
+		sa[count[b]] = int32(i)
 		count[b]++
 	}
 	return sa
 }
 
-func initGroups(sa []int, data []byte) []int {
+func initGroups32(sa []int32, data []byte) []int32 {
 	// label contiguous same-letter groups with the same group number
-	inv := make([]int, len(data))
-	prevGroup := len(sa) - 1
+	inv := make([]int32, len(data))
+	prevGroup := int32(len(sa)) - 1
 	groupByte := data[sa[prevGroup]]
-	for i := len(sa) - 1; i >= 0; i-- {
+	for i := int32(len(sa)) - 1; i >= 0; i-- {
 		if b := data[sa[i]]; b < groupByte {
 			if prevGroup == i+1 {
 				sa[i+1] = -1
@@ -114,13 +116,13 @@ func initGroups(sa []int, data []byte) []int {
 	// This is necessary to ensure the suffix "a" is before "aba"
 	// when using a potentially unstable sort.
 	lastByte := data[len(data)-1]
-	s := -1
+	s := int32(-1)
 	for i := range sa {
 		if sa[i] >= 0 {
 			if data[sa[i]] == lastByte && s == -1 {
-				s = i
+				s = int32(i)
 			}
-			if sa[i] == len(sa)-1 {
+			if sa[i] == int32(len(sa))-1 {
 				sa[i], sa[s] = sa[s], sa[i]
 				inv[sa[s]] = s
 				sa[s] = -1 // mark it as an isolated sorted group
@@ -131,31 +133,31 @@ func initGroups(sa []int, data []byte) []int {
 	return inv
 }
 
-type suffixSortable struct {
-	sa  []int
-	inv []int
-	h   int
-	buf []int // common scratch space
+type suffixSortable32 struct {
+	sa  []int32
+	inv []int32
+	h   int32
+	buf []int32 // common scratch space
 }
 
-func (x *suffixSortable) Len() int           { return len(x.sa) }
-func (x *suffixSortable) Less(i, j int) bool { return x.inv[x.sa[i]+x.h] < x.inv[x.sa[j]+x.h] }
-func (x *suffixSortable) Swap(i, j int)      { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
+func (x *suffixSortable32) Len() int           { return len(x.sa) }
+func (x *suffixSortable32) Less(i, j int) bool { return x.inv[x.sa[i]+x.h] < x.inv[x.sa[j]+x.h] }
+func (x *suffixSortable32) Swap(i, j int)      { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
 
-func (x *suffixSortable) updateGroups(offset int) {
+func (x *suffixSortable32) updateGroups(offset int32) {
 	bounds := x.buf[0:0]
 	group := x.inv[x.sa[0]+x.h]
 	for i := 1; i < len(x.sa); i++ {
 		if g := x.inv[x.sa[i]+x.h]; g > group {
-			bounds = append(bounds, i)
+			bounds = append(bounds, int32(i))
 			group = g
 		}
 	}
-	bounds = append(bounds, len(x.sa))
+	bounds = append(bounds, int32(len(x.sa)))
 	x.buf = bounds
 
 	// update the group numberings after all new groups are determined
-	prev := 0
+	prev := int32(0)
 	for _, b := range bounds {
 		for i := prev; i < b; i++ {
 			x.inv[x.sa[i]] = offset + b - 1
diff --git a/src/index/suffixarray/qsufsort64.go b/src/index/suffixarray/qsufsort64.go
new file mode 100644
index 0000000000..907d6e3726
--- /dev/null
+++ b/src/index/suffixarray/qsufsort64.go
@@ -0,0 +1,173 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by gen64.go; DO NOT EDIT.
+//go:generate go run gen64.go
+
+// This algorithm is based on "Faster Suffix Sorting"
+//   by N. Jesper Larsson and Kunihiko Sadakane
+// paper: http://www.larsson.dogma.net/ssrev-tr.pdf
+// code:  http://www.larsson.dogma.net/qsufsort.c
+
+// This algorithm computes the suffix array sa by computing its inverse.
+// Consecutive groups of suffixes in sa are labeled as sorted groups or
+// unsorted groups. For a given pass of the sorter, all suffixes are ordered
+// up to their first h characters, and sa is h-ordered. Suffixes in their
+// final positions and unambiguously sorted in h-order are in a sorted group.
+// Consecutive groups of suffixes with identical first h characters are an
+// unsorted group. In each pass of the algorithm, unsorted groups are sorted
+// according to the group number of their following suffix.
+
+// In the implementation, if sa[i] is negative, it indicates that i is
+// the first element of a sorted group of length -sa[i], and can be skipped.
+// An unsorted group sa[i:k] is given the group number of the index of its
+// last element, k-1. The group numbers are stored in the inverse slice (inv),
+// and when all groups are sorted, this slice is the inverse suffix array.
+
+package suffixarray
+
+import (
+	"sort"
+)
+
+func qsufsort64(data []byte) []int64 {
+	// initial sorting by first byte of suffix
+	sa := sortedByFirstByte64(data)
+	if len(sa) < 2 {
+		return sa
+	}
+	// initialize the group lookup table
+	// this becomes the inverse of the suffix array when all groups are sorted
+	inv := initGroups64(sa, data)
+
+	// the index starts 1-ordered
+	sufSortable := &suffixSortable64{sa: sa, inv: inv, h: 1}
+
+	for sa[0] > -int64(len(sa)) { // until all suffixes are one big sorted group
+		// The suffixes are h-ordered, make them 2*h-ordered
+		pi := int64(0) // pi is first position of first group
+		sl := int64(0) // sl is negated length of sorted groups
+		for pi < int64(len(sa)) {
+			if s := sa[pi]; s < 0 { // if pi starts sorted group
+				pi -= s // skip over sorted group
+				sl += s // add negated length to sl
+			} else { // if pi starts unsorted group
+				if sl != 0 {
+					sa[pi+sl] = sl // combine sorted groups before pi
+					sl = 0
+				}
+				pk := inv[s] + 1 // pk-1 is last position of unsorted group
+				sufSortable.sa = sa[pi:pk]
+				sort.Sort(sufSortable)
+				sufSortable.updateGroups(pi)
+				pi = pk // next group
+			}
+		}
+		if sl != 0 { // if the array ends with a sorted group
+			sa[pi+sl] = sl // combine sorted groups at end of sa
+		}
+
+		sufSortable.h *= 2 // double sorted depth
+	}
+
+	for i := range sa { // reconstruct suffix array from inverse
+		sa[inv[i]] = int64(i)
+	}
+	return sa
+}
+
+func sortedByFirstByte64(data []byte) []int64 {
+	// total byte counts
+	var count [256]int
+	for _, b := range data {
+		count[b]++
+	}
+	// make count[b] equal index of first occurrence of b in sorted array
+	sum := 0
+	for b := range count {
+		count[b], sum = sum, count[b]+sum
+	}
+	// iterate through bytes, placing index into the correct spot in sa
+	sa := make([]int64, len(data))
+	for i, b := range data {
+		sa[count[b]] = int64(i)
+		count[b]++
+	}
+	return sa
+}
+
+func initGroups64(sa []int64, data []byte) []int64 {
+	// label contiguous same-letter groups with the same group number
+	inv := make([]int64, len(data))
+	prevGroup := int64(len(sa)) - 1
+	groupByte := data[sa[prevGroup]]
+	for i := int64(len(sa)) - 1; i >= 0; i-- {
+		if b := data[sa[i]]; b < groupByte {
+			if prevGroup == i+1 {
+				sa[i+1] = -1
+			}
+			groupByte = b
+			prevGroup = i
+		}
+		inv[sa[i]] = prevGroup
+		if prevGroup == 0 {
+			sa[0] = -1
+		}
+	}
+	// Separate out the final suffix to the start of its group.
+	// This is necessary to ensure the suffix "a" is before "aba"
+	// when using a potentially unstable sort.
+	lastByte := data[len(data)-1]
+	s := int64(-1)
+	for i := range sa {
+		if sa[i] >= 0 {
+			if data[sa[i]] == lastByte && s == -1 {
+				s = int64(i)
+			}
+			if sa[i] == int64(len(sa))-1 {
+				sa[i], sa[s] = sa[s], sa[i]
+				inv[sa[s]] = s
+				sa[s] = -1 // mark it as an isolated sorted group
+				break
+			}
+		}
+	}
+	return inv
+}
+
+type suffixSortable64 struct {
+	sa  []int64
+	inv []int64
+	h   int64
+	buf []int64 // common scratch space
+}
+
+func (x *suffixSortable64) Len() int           { return len(x.sa) }
+func (x *suffixSortable64) Less(i, j int) bool { return x.inv[x.sa[i]+x.h] < x.inv[x.sa[j]+x.h] }
+func (x *suffixSortable64) Swap(i, j int)      { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
+
+func (x *suffixSortable64) updateGroups(offset int64) {
+	bounds := x.buf[0:0]
+	group := x.inv[x.sa[0]+x.h]
+	for i := 1; i < len(x.sa); i++ {
+		if g := x.inv[x.sa[i]+x.h]; g > group {
+			bounds = append(bounds, int64(i))
+			group = g
+		}
+	}
+	bounds = append(bounds, int64(len(x.sa)))
+	x.buf = bounds
+
+	// update the group numberings after all new groups are determined
+	prev := int64(0)
+	for _, b := range bounds {
+		for i := prev; i < b; i++ {
+			x.inv[x.sa[i]] = offset + b - 1
+		}
+		if b-prev == 1 {
+			x.sa[prev] = -1
+		}
+		prev = b
+	}
+}
diff --git a/src/index/suffixarray/suffixarray.go b/src/index/suffixarray/suffixarray.go
index 0961ac4fb2..339643db4d 100644
--- a/src/index/suffixarray/suffixarray.go
+++ b/src/index/suffixarray/suffixarray.go
@@ -19,21 +19,68 @@ package suffixarray
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
 	"io"
+	"math"
 	"regexp"
 	"sort"
 )
 
+// Can change for testing
+var maxData32 int = realMaxData32
+
+const realMaxData32 = math.MaxInt32
+
 // Index implements a suffix array for fast substring search.
 type Index struct {
 	data []byte
-	sa   []int // suffix array for data; len(sa) == len(data)
+	sa   ints // suffix array for data; sa.len() == len(data)
+}
+
+// An ints is either an []int32 or an []int64.
+// That is, one of them is empty, and one is the real data.
+// The int64 form is used when len(data) > maxData32
+type ints struct {
+	int32 []int32
+	int64 []int64
+}
+
+func (a *ints) len() int {
+	return len(a.int32) + len(a.int64)
+}
+
+func (a *ints) get(i int) int64 {
+	if a.int32 != nil {
+		return int64(a.int32[i])
+	}
+	return a.int64[i]
+}
+
+func (a *ints) set(i int, v int64) {
+	if a.int32 != nil {
+		a.int32[i] = int32(v)
+	} else {
+		a.int64[i] = v
+	}
+}
+
+func (a *ints) slice(i, j int) ints {
+	if a.int32 != nil {
+		return ints{a.int32[i:j], nil}
+	}
+	return ints{nil, a.int64[i:j]}
 }
 
 // New creates a new Index for data.
 // Index creation time is O(N*log(N)) for N = len(data).
 func New(data []byte) *Index {
-	return &Index{data, qsufsort(data)}
+	ix := &Index{data: data}
+	if len(data) <= maxData32 {
+		ix.sa.int32 = qsufsort32(data)
+	} else {
+		ix.sa.int64 = qsufsort64(data)
+	}
+	return ix
 }
 
 // writeInt writes an int x to w using buf to buffer the write.
@@ -44,19 +91,20 @@ func writeInt(w io.Writer, buf []byte, x int) error {
 }
 
 // readInt reads an int x from r using buf to buffer the read and returns x.
-func readInt(r io.Reader, buf []byte) (int, error) {
+func readInt(r io.Reader, buf []byte) (int64, error) {
 	_, err := io.ReadFull(r, buf[0:binary.MaxVarintLen64]) // ok to continue with error
 	x, _ := binary.Varint(buf)
-	return int(x), err
+	return x, err
 }
 
 // writeSlice writes data[:n] to w and returns n.
 // It uses buf to buffer the write.
-func writeSlice(w io.Writer, buf []byte, data []int) (n int, err error) {
+func writeSlice(w io.Writer, buf []byte, data ints) (n int, err error) {
 	// encode as many elements as fit into buf
 	p := binary.MaxVarintLen64
-	for ; n < len(data) && p+binary.MaxVarintLen64 <= len(buf); n++ {
-		p += binary.PutUvarint(buf[p:], uint64(data[n]))
+	m := data.len()
+	for ; n < m && p+binary.MaxVarintLen64 <= len(buf); n++ {
+		p += binary.PutUvarint(buf[p:], uint64(data.get(n)))
 	}
 
 	// update buffer size
@@ -67,15 +115,22 @@ func writeSlice(w io.Writer, buf []byte, data []int) (n int, err error) {
 	return
 }
 
+var errTooBig = errors.New("suffixarray: data too large")
+
 // readSlice reads data[:n] from r and returns n.
 // It uses buf to buffer the read.
-func readSlice(r io.Reader, buf []byte, data []int) (n int, err error) {
+func readSlice(r io.Reader, buf []byte, data ints) (n int, err error) {
 	// read buffer size
-	var size int
-	size, err = readInt(r, buf)
+	var size64 int64
+	size64, err = readInt(r, buf)
 	if err != nil {
 		return
 	}
+	if int64(int(size64)) != size64 || int(size64) < 0 {
+		// We never write chunks this big anyway.
+		return 0, errTooBig
+	}
+	size := int(size64)
 
 	// read buffer w/o the size
 	if _, err = io.ReadFull(r, buf[binary.MaxVarintLen64:size]); err != nil {
@@ -85,7 +140,7 @@ func readSlice(r io.Reader, buf []byte, data []int) (n int, err error) {
 	// decode as many elements as present in buf
 	for p := binary.MaxVarintLen64; p < size; n++ {
 		x, w := binary.Uvarint(buf[p:])
-		data[n] = int(x)
+		data.set(n, int64(x))
 		p += w
 	}
 
@@ -100,21 +155,31 @@ func (x *Index) Read(r io.Reader) error {
 	buf := make([]byte, bufSize)
 
 	// read length
-	n, err := readInt(r, buf)
+	n64, err := readInt(r, buf)
 	if err != nil {
 		return err
 	}
+	if int64(int(n64)) != n64 || int(n64) < 0 {
+		return errTooBig
+	}
+	n := int(n64)
 
 	// allocate space
-	if 2*n < cap(x.data) || cap(x.data) < n {
+	if 2*n < cap(x.data) || cap(x.data) < n || x.sa.int32 != nil && n > maxData32 || x.sa.int64 != nil && n <= maxData32 {
 		// new data is significantly smaller or larger than
 		// existing buffers - allocate new ones
 		x.data = make([]byte, n)
-		x.sa = make([]int, n)
+		x.sa.int32 = nil
+		x.sa.int64 = nil
+		if n <= maxData32 {
+			x.sa.int32 = make([]int32, n)
+		} else {
+			x.sa.int64 = make([]int64, n)
+		}
 	} else {
 		// re-use existing buffers
 		x.data = x.data[0:n]
-		x.sa = x.sa[0:n]
+		x.sa = x.sa.slice(0, n)
 	}
 
 	// read data
@@ -123,12 +188,13 @@ func (x *Index) Read(r io.Reader) error {
 	}
 
 	// read index
-	for sa := x.sa; len(sa) > 0; {
+	sa := x.sa
+	for sa.len() > 0 {
 		n, err := readSlice(r, buf, sa)
 		if err != nil {
 			return err
 		}
-		sa = sa[n:]
+		sa = sa.slice(n, sa.len())
 	}
 	return nil
 }
@@ -149,12 +215,13 @@ func (x *Index) Write(w io.Writer) error {
 	}
 
 	// write index
-	for sa := x.sa; len(sa) > 0; {
+	sa := x.sa
+	for sa.len() > 0 {
 		n, err := writeSlice(w, buf, sa)
 		if err != nil {
 			return err
 		}
-		sa = sa[n:]
+		sa = sa.slice(n, sa.len())
 	}
 	return nil
 }
@@ -167,18 +234,18 @@ func (x *Index) Bytes() []byte {
 }
 
 func (x *Index) at(i int) []byte {
-	return x.data[x.sa[i]:]
+	return x.data[x.sa.get(i):]
 }
 
 // lookupAll returns a slice into the matching region of the index.
 // The runtime is O(log(N)*len(s)).
-func (x *Index) lookupAll(s []byte) []int {
+func (x *Index) lookupAll(s []byte) ints {
 	// find matching suffix index range [i:j]
 	// find the first index where s would be the prefix
-	i := sort.Search(len(x.sa), func(i int) bool { return bytes.Compare(x.at(i), s) >= 0 })
+	i := sort.Search(x.sa.len(), func(i int) bool { return bytes.Compare(x.at(i), s) >= 0 })
 	// starting at i, find the first index at which s is not a prefix
-	j := i + sort.Search(len(x.sa)-i, func(j int) bool { return !bytes.HasPrefix(x.at(j+i), s) })
-	return x.sa[i:j]
+	j := i + sort.Search(x.sa.len()-i, func(j int) bool { return !bytes.HasPrefix(x.at(j+i), s) })
+	return x.sa.slice(i, j)
 }
 
 // Lookup returns an unsorted list of at most n indices where the byte string s
@@ -190,13 +257,22 @@ func (x *Index) lookupAll(s []byte) []int {
 func (x *Index) Lookup(s []byte, n int) (result []int) {
 	if len(s) > 0 && n != 0 {
 		matches := x.lookupAll(s)
-		if n < 0 || len(matches) < n {
-			n = len(matches)
+		count := matches.len()
+		if n < 0 || count < n {
+			n = count
 		}
-		// 0 <= n <= len(matches)
+		// 0 <= n <= count
 		if n > 0 {
 			result = make([]int, n)
-			copy(result, matches)
+			if matches.int32 != nil {
+				for i := range result {
+					result[i] = int(matches.int32[i])
+				}
+			} else {
+				for i := range result {
+					result[i] = int(matches.int64[i])
+				}
+			}
 		}
 	}
 	return
diff --git a/src/index/suffixarray/suffixarray_test.go b/src/index/suffixarray/suffixarray_test.go
index 644f00c757..3cb8450105 100644
--- a/src/index/suffixarray/suffixarray_test.go
+++ b/src/index/suffixarray/suffixarray_test.go
@@ -6,7 +6,11 @@ package suffixarray
 
 import (
 	"bytes"
+	"fmt"
+	"io/ioutil"
 	"math/rand"
+	"os"
+	"path/filepath"
 	"regexp"
 	"sort"
 	"strings"
@@ -207,10 +211,19 @@ func testLookups(t *testing.T, tc *testCase, x *Index, n int) {
 // index is used to hide the sort.Interface
 type index Index
 
-func (x *index) Len() int           { return len(x.sa) }
+func (x *index) Len() int           { return x.sa.len() }
 func (x *index) Less(i, j int) bool { return bytes.Compare(x.at(i), x.at(j)) < 0 }
-func (x *index) Swap(i, j int)      { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
-func (a *index) at(i int) []byte    { return a.data[a.sa[i]:] }
+func (x *index) Swap(i, j int) {
+	if x.sa.int32 != nil {
+		x.sa.int32[i], x.sa.int32[j] = x.sa.int32[j], x.sa.int32[i]
+	} else {
+		x.sa.int64[i], x.sa.int64[j] = x.sa.int64[j], x.sa.int64[i]
+	}
+}
+
+func (x *index) at(i int) []byte {
+	return x.data[x.sa.get(i):]
+}
 
 func testConstruction(t *testing.T, tc *testCase, x *Index) {
 	if !sort.IsSorted((*index)(x)) {
@@ -222,8 +235,12 @@ func equal(x, y *Index) bool {
 	if !bytes.Equal(x.data, y.data) {
 		return false
 	}
-	for i, j := range x.sa {
-		if j != y.sa[i] {
+	if x.sa.len() != y.sa.len() {
+		return false
+	}
+	n := x.sa.len()
+	for i := 0; i < n; i++ {
+		if x.sa.get(i) != y.sa.get(i) {
 			return false
 		}
 	}
@@ -238,16 +255,41 @@ func testSaveRestore(t *testing.T, tc *testCase, x *Index) int {
 	}
 	size := buf.Len()
 	var y Index
-	if err := y.Read(&buf); err != nil {
+	if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
+		t.Errorf("failed reading index %s (%s)", tc.name, err)
+	}
+	if !equal(x, &y) {
+		t.Errorf("restored index doesn't match saved index %s", tc.name)
+	}
+
+	old := maxData32
+	defer func() {
+		maxData32 = old
+	}()
+	// Reread as forced 32.
+	y = Index{}
+	maxData32 = realMaxData32
+	if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
+		t.Errorf("failed reading index %s (%s)", tc.name, err)
+	}
+	if !equal(x, &y) {
+		t.Errorf("restored index doesn't match saved index %s", tc.name)
+	}
+
+	// Reread as forced 64.
+	y = Index{}
+	maxData32 = -1
+	if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
 		t.Errorf("failed reading index %s (%s)", tc.name, err)
 	}
 	if !equal(x, &y) {
 		t.Errorf("restored index doesn't match saved index %s", tc.name)
 	}
+
 	return size
 }
 
-func TestIndex(t *testing.T) {
+func testIndex(t *testing.T) {
 	for _, tc := range testCases {
 		x := New([]byte(tc.source))
 		testConstruction(t, &tc, x)
@@ -260,45 +302,162 @@ func TestIndex(t *testing.T) {
 	}
 }
 
+func TestIndex32(t *testing.T) {
+	testIndex(t)
+}
+
+func TestIndex64(t *testing.T) {
+	maxData32 = -1
+	defer func() {
+		maxData32 = realMaxData32
+	}()
+	testIndex(t)
+}
+
+var (
+	benchdata = make([]byte, 1e6)
+	benchrand = make([]byte, 1e6)
+)
+
 // Of all possible inputs, the random bytes have the least amount of substring
 // repetition, and the repeated bytes have the most. For most algorithms,
 // the running time of every input will be between these two.
 func benchmarkNew(b *testing.B, random bool) {
+	b.ReportAllocs()
 	b.StopTimer()
-	data := make([]byte, 1e6)
+	data := benchdata
 	if random {
-		for i := range data {
-			data[i] = byte(rand.Intn(256))
+		data = benchrand
+		if data[0] == 0 {
+			for i := range data {
+				data[i] = byte(rand.Intn(256))
+			}
 		}
 	}
 	b.StartTimer()
+	b.SetBytes(int64(len(data)))
 	for i := 0; i < b.N; i++ {
 		New(data)
 	}
 }
 
-func BenchmarkNewIndexRandom(b *testing.B) {
-	benchmarkNew(b, true)
+func makeText(name string) ([]byte, error) {
+	var data []byte
+	switch name {
+	case "opticks":
+		var err error
+		data, err = ioutil.ReadFile("../../testdata/Isaac.Newton-Opticks.txt")
+		if err != nil {
+			return nil, err
+		}
+	case "go":
+		err := filepath.Walk("../..", func(path string, info os.FileInfo, err error) error {
+			if err == nil && strings.HasSuffix(path, ".go") && !info.IsDir() {
+				file, err := ioutil.ReadFile(path)
+				if err != nil {
+					return err
+				}
+				data = append(data, file...)
+			}
+			return nil
+		})
+		if err != nil {
+			return nil, err
+		}
+	case "zero":
+		data = make([]byte, 50e6)
+	case "rand":
+		data = make([]byte, 50e6)
+		for i := range data {
+			data[i] = byte(rand.Intn(256))
+		}
+	}
+	return data, nil
 }
-func BenchmarkNewIndexRepeat(b *testing.B) {
-	benchmarkNew(b, false)
+
+func setBits(bits int) (cleanup func()) {
+	if bits == 32 {
+		maxData32 = realMaxData32
+	} else {
+		maxData32 = -1 // force use of 64-bit code
+	}
+	return func() {
+		maxData32 = realMaxData32
+	}
+}
+
+func BenchmarkNew(b *testing.B) {
+	for _, text := range []string{"opticks", "go", "zero", "rand"} {
+		b.Run("text="+text, func(b *testing.B) {
+			data, err := makeText(text)
+			if err != nil {
+				b.Fatal(err)
+			}
+			if testing.Short() && len(data) > 5e6 {
+				data = data[:5e6]
+			}
+			for _, size := range []int{100e3, 500e3, 1e6, 5e6, 10e6, 50e6} {
+				if len(data) < size {
+					continue
+				}
+				data := data[:size]
+				name := fmt.Sprintf("%dK", size/1e3)
+				if size >= 1e6 {
+					name = fmt.Sprintf("%dM", size/1e6)
+				}
+				b.Run("size="+name, func(b *testing.B) {
+					for _, bits := range []int{32, 64} {
+						if ^uint(0) == 0xffffffff && bits == 64 {
+							continue
+						}
+						b.Run(fmt.Sprintf("bits=%d", bits), func(b *testing.B) {
+							cleanup := setBits(bits)
+							defer cleanup()
+
+							b.SetBytes(int64(len(data)))
+							b.ReportAllocs()
+							for i := 0; i < b.N; i++ {
+								New(data)
+							}
+						})
+					}
+				})
+			}
+		})
+	}
 }
 
 func BenchmarkSaveRestore(b *testing.B) {
-	b.StopTimer()
 	r := rand.New(rand.NewSource(0x5a77a1)) // guarantee always same sequence
 	data := make([]byte, 1<<20)             // 1MB of data to index
 	for i := range data {
 		data[i] = byte(r.Intn(256))
 	}
-	x := New(data)
-	size := testSaveRestore(nil, nil, x)       // verify correctness
-	buf := bytes.NewBuffer(make([]byte, size)) // avoid growing
-	b.SetBytes(int64(size))
-	b.StartTimer()
-	for i := 0; i < b.N; i++ {
-		x.Write(buf)
-		var y Index
-		y.Read(buf)
+	for _, bits := range []int{32, 64} {
+		if ^uint(0) == 0xffffffff && bits == 64 {
+			continue
+		}
+		b.Run(fmt.Sprintf("bits=%d", bits), func(b *testing.B) {
+			cleanup := setBits(bits)
+			defer cleanup()
+
+			b.StopTimer()
+			x := New(data)
+			size := testSaveRestore(nil, nil, x)       // verify correctness
+			buf := bytes.NewBuffer(make([]byte, size)) // avoid growing
+			b.SetBytes(int64(size))
+			b.StartTimer()
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				buf.Reset()
+				if err := x.Write(buf); err != nil {
+					b.Fatal(err)
+				}
+				var y Index
+				if err := y.Read(buf); err != nil {
+					b.Fatal(err)
+				}
+			}
+		})
 	}
 }