From 6916c5f66d59123e4578ddeedc39adaece6f1c54 Mon Sep 17 00:00:00 2001 From: c-bata Date: Sun, 9 Dec 2018 16:35:08 +0900 Subject: [PATCH] Move some strings utilities to internal/strings --- document.go | 96 ++------------------------- internal/strings/strings.go | 110 +++++++++++++++++++++++++++++++ internal/strings/strings_test.go | 47 +++++++++++++ 3 files changed, 162 insertions(+), 91 deletions(-) create mode 100644 internal/strings/strings.go create mode 100644 internal/strings/strings_test.go diff --git a/document.go b/document.go index a3aaee1..54b7a37 100644 --- a/document.go +++ b/document.go @@ -5,6 +5,7 @@ import ( "unicode/utf8" "github.com/c-bata/go-prompt/internal/bisect" + istrings "github.com/c-bata/go-prompt/internal/strings" runewidth "github.com/mattn/go-runewidth" ) @@ -133,7 +134,7 @@ func (d *Document) FindStartOfPreviousWord() int { // The only difference is to ignore contiguous spaces. func (d *Document) FindStartOfPreviousWordWithSpace() int { x := d.TextBeforeCursor() - end := lastIndexByteNot(x, ' ') + end := istrings.LastIndexNotByte(x, ' ') if end == -1 { return 0 } @@ -168,7 +169,7 @@ func (d *Document) FindStartOfPreviousWordUntilSeparatorIgnoreNextToCursor(sep s } x := d.TextBeforeCursor() - end := lastIndexAnyNot(x, sep) + end := istrings.LastIndexNotAny(x, sep) if end == -1 { return 0 } @@ -195,7 +196,7 @@ func (d *Document) FindEndOfCurrentWord() int { func (d *Document) FindEndOfCurrentWordWithSpace() int { x := d.TextAfterCursor() - start := indexByteNot(x, ' ') + start := istrings.IndexNotByte(x, ' ') if start == -1 { return len(x) } @@ -232,7 +233,7 @@ func (d *Document) FindEndOfCurrentWordUntilSeparatorIgnoreNextToCursor(sep stri x := d.TextAfterCursor() - start := indexAnyNot(x, sep) + start := istrings.IndexNotAny(x, sep) if start == -1 { return len(x) } @@ -432,90 +433,3 @@ func (d *Document) leadingWhitespaceInCurrentLine() (margin string) { margin = d.CurrentLine()[:len(d.CurrentLine())-len(trimmed)] return } - -func indexByteNot(s string, c byte) int { - n := len(s) - for i := 0; i < n; i++ { - if s[i] != c { - return i - } - } - return -1 -} - -func lastIndexByteNot(s string, c byte) int { - for i := len(s) - 1; i >= 0; i-- { - if s[i] != c { - return i - } - } - return -1 -} - -type asciiSet [8]uint32 - -func (as *asciiSet) notContains(c byte) bool { - return (as[c>>5] & (1 << uint(c&31))) == 0 -} - -func makeASCIISet(chars string) (as asciiSet, ok bool) { - for i := 0; i < len(chars); i++ { - c := chars[i] - if c >= utf8.RuneSelf { - return as, false - } - as[c>>5] |= 1 << uint(c&31) - } - return as, true -} - -func indexAnyNot(s, chars string) int { - if len(chars) > 0 { - if len(s) > 8 { - if as, isASCII := makeASCIISet(chars); isASCII { - for i := 0; i < len(s); i++ { - if as.notContains(s[i]) { - return i - } - } - return -1 - } - } - for i := 0; i < len(s); { - // I don't know why strings.IndexAny doesn't add rune count here. - r, size := utf8.DecodeRuneInString(s[i:]) - i += size - for _, c := range chars { - if r != c { - return i - } - } - } - } - return -1 -} - -func lastIndexAnyNot(s, chars string) int { - if len(chars) > 0 { - if len(s) > 8 { - if as, isASCII := makeASCIISet(chars); isASCII { - for i := len(s) - 1; i >= 0; i-- { - if as.notContains(s[i]) { - return i - } - } - return -1 - } - } - for i := len(s); i > 0; { - r, size := utf8.DecodeLastRuneInString(s[:i]) - i -= size - for _, c := range chars { - if r != c { - return i - } - } - } - } - return -1 -} diff --git a/internal/strings/strings.go b/internal/strings/strings.go new file mode 100644 index 0000000..c537876 --- /dev/null +++ b/internal/strings/strings.go @@ -0,0 +1,110 @@ +package strings + +import "unicode/utf8" + +// IndexNotByte is similar with strings.IndexByte but returns +// the index of the first instance of character except c in s. +// or -1 if s only contains c. +func IndexNotByte(s string, c byte) int { + n := len(s) + for i := 0; i < n; i++ { + if s[i] != c { + return i + } + } + return -1 +} + +// LastIndexByte is similar with strings.IndexByte but returns +// the index of the last instance of character except c in s, +// or -1 if s only contains c. +func LastIndexNotByte(s string, c byte) int { + for i := len(s) - 1; i >= 0; i-- { + if s[i] != c { + return i + } + } + return -1 +} + +type asciiSet [8]uint32 + +func (as *asciiSet) notContains(c byte) bool { + return (as[c>>5] & (1 << uint(c&31))) == 0 +} + +func makeASCIISet(chars string) (as asciiSet, ok bool) { + for i := 0; i < len(chars); i++ { + c := chars[i] + if c >= utf8.RuneSelf { + return as, false + } + as[c>>5] |= 1 << uint(c&31) + } + return as, true +} + +// IndexNotAny is similar with strings.IndexAny but returns +// the index of the first instance of any Unicode code point from chars in s, +// or -1 if no Unicode code point from chars is present in s. +func IndexNotAny(s, chars string) int { + if len(chars) > 0 { + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i := 0; i < len(s); i++ { + if as.notContains(s[i]) { + return i + } + } + return -1 + } + } + + LabelFirstLoop: + for i, c := range s { + for j, m := range chars { + if c != m && j == len(chars)-1 { + return i + } else if c != m { + continue + } else { + continue LabelFirstLoop + } + } + } + } + return -1 +} + +// LastIndexAny returns the index of the last instance of any Unicode code +// point from chars in s, or -1 if no Unicode code point from chars is +// present in s. +func LastIndexNotAny(s, chars string) int { + if len(chars) > 0 { + if len(s) > 8 { + if as, isASCII := makeASCIISet(chars); isASCII { + for i := len(s) - 1; i >= 0; i-- { + if as.notContains(s[i]) { + return i + } + } + return -1 + } + } + LabelFirstLoop: + for i := len(s); i > 0; { + r, size := utf8.DecodeLastRuneInString(s[:i]) + i -= size + for j, m := range chars { + if r != m && j == len(chars)-1 { + return i + } else if r != m { + continue + } else { + continue LabelFirstLoop + } + } + } + } + return -1 +} diff --git a/internal/strings/strings_test.go b/internal/strings/strings_test.go new file mode 100644 index 0000000..d9d9bc9 --- /dev/null +++ b/internal/strings/strings_test.go @@ -0,0 +1,47 @@ +package strings_test + +import ( + "fmt" + + "github.com/c-bata/go-prompt/internal/strings" +) + +func ExampleIndexNotByte() { + fmt.Println(strings.IndexNotByte("golang", 'g')) + fmt.Println(strings.IndexNotByte("golang", 'x')) + fmt.Println(strings.IndexNotByte("gggggg", 'g')) + // Output: + // 1 + // 0 + // -1 +} + +func ExampleLastIndexNotByte() { + fmt.Println(strings.LastIndexNotByte("golang", 'g')) + fmt.Println(strings.LastIndexNotByte("golang", 'x')) + fmt.Println(strings.LastIndexNotByte("gggggg", 'g')) + // Output: + // 4 + // 5 + // -1 +} + +func ExampleIndexNotAny() { + fmt.Println(strings.IndexNotAny("golang", "glo")) + fmt.Println(strings.IndexNotAny("golang", "gl")) + fmt.Println(strings.IndexNotAny("golang", "golang")) + // Output: + // 3 + // 1 + // -1 +} + +func ExampleLastIndexNotAny() { + fmt.Println(strings.LastIndexNotAny("golang", "agn")) + fmt.Println(strings.LastIndexNotAny("golang", "an")) + fmt.Println(strings.LastIndexNotAny("golang", "golang")) + // Output: + // 2 + // 5 + // -1 +}