diff options
-rw-r--r-- | util/text/left_padded.go | 19 | ||||
-rw-r--r-- | util/text/text.go | 306 | ||||
-rw-r--r-- | util/text/text_test.go | 109 |
3 files changed, 342 insertions, 92 deletions
diff --git a/util/text/left_padded.go b/util/text/left_padded.go index 729834db8..eae65d345 100644 --- a/util/text/left_padded.go +++ b/util/text/left_padded.go @@ -3,25 +3,26 @@ package text import ( "bytes" "fmt" + "github.com/mattn/go-runewidth" "strings" ) -// LeftPadMaxLine pads a string on the left by a specified amount and pads the string on the right to fill the maxLength +// LeftPadMaxLine pads a string on the left by a specified amount and pads the +// string on the right to fill the maxLength func LeftPadMaxLine(text string, length, leftPad int) string { - runes := []rune(text) + var rightPart string = text + scrWidth := runewidth.StringWidth(text) // truncate and ellipse if needed - if len(runes)+leftPad > length { - runes = append(runes[:(length-leftPad-1)], '…') - } - - if len(runes)+leftPad < length { - runes = append(runes, []rune(strings.Repeat(" ", length-len(runes)-leftPad))...) + if scrWidth+leftPad > length { + rightPart = runewidth.Truncate(text, length-leftPad, "…") + } else if scrWidth+leftPad < length { + rightPart = runewidth.FillRight(text, length-leftPad) } return fmt.Sprintf("%s%s", strings.Repeat(" ", leftPad), - string(runes), + rightPart, ) } diff --git a/util/text/text.go b/util/text/text.go index cffb4ee21..81cc870b6 100644 --- a/util/text/text.go +++ b/util/text/text.go @@ -1,12 +1,18 @@ package text import ( - "bytes" - "strings" - "github.com/mattn/go-runewidth" + "strings" + "unicode/utf8" ) +// Force runewidth not to treat ambiguous runes as wide chars, so that things +// like unicode ellipsis/up/down/left/right glyphs can have correct runewidth +// and can be displayed correctly in terminals. +func init() { + runewidth.DefaultCondition.EastAsianWidth = false +} + // Wrap a text for an exact line size // Handle properly terminal color escape code func Wrap(text string, lineWidth int) (string, int) { @@ -16,98 +22,248 @@ func Wrap(text string, lineWidth int) (string, int) { // Wrap a text for an exact line size with a left padding // Handle properly terminal color escape code func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) { - var textBuffer bytes.Buffer - var lineBuffer bytes.Buffer - nbLine := 1 - firstLine := true + var lines []string + nbLine := 0 pad := strings.Repeat(" ", leftPad) // tabs are formatted as 4 spaces - text = strings.Replace(text, "\t", " ", 4) - + text = strings.Replace(text, "\t", " ", -1) + // NOTE: text is first segmented into lines so that softwrapLine can handle. for _, line := range strings.Split(text, "\n") { - spaceLeft := lineWidth - leftPad - - if !firstLine { - textBuffer.WriteString("\n") + if line == "" || strings.TrimSpace(line) == "" { + lines = append(lines, "") nbLine++ + } else { + wrapped := softwrapLine(line, lineWidth-leftPad) + firstLine := true + for _, seg := range strings.Split(wrapped, "\n") { + if firstLine { + lines = append(lines, pad+strings.TrimRight(seg, " ")) + firstLine = false + } else { + lines = append(lines, pad+strings.TrimSpace(seg)) + } + nbLine++ + } } + } + return strings.Join(lines, "\n"), nbLine +} - firstWord := true +// Break a line into several lines so that each line consumes at most +// 'textWidth' cells. Lines break at groups of white spaces and multibyte +// chars. Nothing is removed from the original text so that it behaves like a +// softwrap. +// +// Required: The line shall not contain '\n' +// +// WRAPPING ALGORITHM: The line is broken into non-breakable chunks, then line +// breaks ("\n") are inserted between these groups so that the total length +// between breaks does not exceed the required width. Words that are longer than +// the textWidth are broen into pieces no longer than textWidth. +// +func softwrapLine(line string, textWidth int) string { + // NOTE: terminal escapes are stripped out of the line so the algorithm is + // simpler. Do not try to mix them in the wrapping algorithm, as it can get + // complicated quickly. + line1, termEscapes := extractTermEscapes(line) + + chunks := segmentLine(line1) + // Reverse the chunk array so we can use it as a stack. + for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 { + chunks[i], chunks[j] = chunks[j], chunks[i] + } + var line2 string = "" + var width int = 0 + for len(chunks) > 0 { + thisWord := chunks[len(chunks)-1] + wl := wordLen(thisWord) + if width+wl <= textWidth { + line2 += chunks[len(chunks)-1] + chunks = chunks[:len(chunks)-1] + width += wl + if width == textWidth && len(chunks) > 0 { + // NOTE: new line begins when current line is full and there are more + // chunks to come. + line2 += "\n" + width = 0 + } + } else if wl > textWidth { + // NOTE: By default, long words are splited to fill the remaining space. + // But if the long words is the first non-space word in the middle of the + // line, preceeding spaces shall not be counted in word spliting. + splitWidth := textWidth - width + if strings.HasSuffix(line2, "\n"+strings.Repeat(" ", width)) { + splitWidth += width + } + left, right := splitWord(chunks[len(chunks)-1], splitWidth) + chunks[len(chunks)-1] = right + line2 += left + "\n" + width = 0 + } else { + line2 += "\n" + width = 0 + } + } - for _, word := range strings.Split(line, " ") { - wordLength := wordLen(word) + line3 := applyTermEscapes(line2, termEscapes) + return line3 +} - if !firstWord { - lineBuffer.WriteString(" ") - spaceLeft -= 1 +// EscapeItem: Storage of terminal escapes in a line. 'item' is the actural +// escape command, and 'pos' is the index in the rune array where the 'item' +// shall be inserted back. For example, the escape item in "F\x1b33mox" is +// {"\x1b33m", 1}. +type escapeItem struct { + item string + pos int +} - if spaceLeft <= 0 { - textBuffer.WriteString(pad + strings.TrimRight(lineBuffer.String(), " ")) - textBuffer.WriteString("\n") - lineBuffer.Reset() - spaceLeft = lineWidth - leftPad - nbLine++ - firstLine = false - } +// Extract terminal escapes out of a line, returns a new line without terminal +// escapes and a slice of escape items. The terminal escapes can be inserted +// back into the new line at rune index 'item.pos' to recover the original line. +// +// Required: The line shall not contain "\n" +// +func extractTermEscapes(line string) (string, []escapeItem) { + var termEscapes []escapeItem + var line1 string + + pos := 0 + item := "" + occupiedRuneCount := 0 + inEscape := false + for i, r := range []rune(line) { + if r == '\x1b' { + pos = i + item = string(r) + inEscape = true + continue + } + if inEscape { + item += string(r) + if r == 'm' { + termEscapes = append(termEscapes, escapeItem{item, pos - occupiedRuneCount}) + occupiedRuneCount += utf8.RuneCountInString(item) + inEscape = false } + continue + } + line1 += string(r) + } + + return line1, termEscapes +} - // Word fit in the current line - if spaceLeft >= wordLength { - lineBuffer.WriteString(word) - spaceLeft -= wordLength - firstWord = false +// Apply the extracted terminal escapes to the edited line. The only edit +// allowed is to insert "\n" like that in softwrapLine. Callers shall ensure +// this since this function is not able to check it. +func applyTermEscapes(line string, escapes []escapeItem) string { + if len(escapes) == 0 { + return line + } + + var out string = "" + + currPos := 0 + currItem := 0 + for _, r := range line { + if currItem < len(escapes) && currPos == escapes[currItem].pos { + // NOTE: We avoid terminal escapes at the end of a line by move them one + // pass the end of line, so that algorithms who trim right spaces are + // happy. But algorithms who trim left spaces are still unhappy. + if r == '\n' { + out += "\n" + escapes[currItem].item } else { - // Break a word longer than a line - if wordLength > lineWidth { - for wordLength > 0 && wordLen(word) > 0 { - l := minInt(spaceLeft, wordLength) - part, leftover := splitWord(word, l) - word = leftover - wordLength = wordLen(word) - - lineBuffer.WriteString(part) - textBuffer.WriteString(pad) - textBuffer.Write(lineBuffer.Bytes()) - lineBuffer.Reset() - - spaceLeft -= l - - if spaceLeft <= 0 { - textBuffer.WriteString("\n") - nbLine++ - spaceLeft = lineWidth - leftPad - } - - if wordLength <= 0 { - break - } - } - } else { - // Normal break - textBuffer.WriteString(pad + strings.TrimRight(lineBuffer.String(), " ")) - textBuffer.WriteString("\n") - lineBuffer.Reset() - lineBuffer.WriteString(word) - firstWord = false - spaceLeft = lineWidth - leftPad - wordLength - nbLine++ - } + out += escapes[currItem].item + string(r) + currPos++ } + currItem++ + } else { + if r != '\n' { + currPos++ + } + out += string(r) } + } - if lineBuffer.Len() > 0 { - textBuffer.WriteString(pad + strings.TrimRight(lineBuffer.String(), " ")) - lineBuffer.Reset() - } + return out +} - firstLine = false +// Segment a line into chunks, where each chunk consists of chars with the same +// type and is not breakable. +func segmentLine(s string) []string { + var chunks []string + + var word string + wordType := none + flushWord := func() { + chunks = append(chunks, word) + word = "" + wordType = none } - return textBuffer.String(), nbLine + for _, r := range s { + // A WIDE_CHAR itself constitutes a chunk. + thisType := runeType(r) + if thisType == wideChar { + if wordType != none { + flushWord() + } + chunks = append(chunks, string(r)) + continue + } + // Other type of chunks starts with a char of that type, and ends with a + // char with different type or end of string. + if thisType != wordType { + if wordType != none { + flushWord() + } + word = string(r) + wordType = thisType + } else { + word += string(r) + } + } + if word != "" { + flushWord() + } + + return chunks +} + +// Rune categories +// +// These categories are so defined that each category forms a non-breakable +// chunk. It IS NOT the same as unicode code point categories. +// +const ( + none int = iota + wideChar + invisible + shortUnicode + space + visibleAscii +) + +// Determine the category of a rune. +func runeType(r rune) int { + rw := runewidth.RuneWidth(r) + if rw > 1 { + return wideChar + } else if rw == 0 { + return invisible + } else if r > 127 { + return shortUnicode + } else if r == ' ' { + return space + } else { + return visibleAscii + } } -// wordLen return the length of a word, while ignoring the terminal escape sequences +// wordLen return the length of a word, while ignoring the terminal escape +// sequences func wordLen(word string) int { length := 0 escape := false @@ -116,11 +272,9 @@ func wordLen(word string) int { if char == '\x1b' { escape = true } - if !escape { length += runewidth.RuneWidth(rune(char)) } - if char == 'm' { escape = false } diff --git a/util/text/text_test.go b/util/text/text_test.go index f5b15a43a..9bf211645 100644 --- a/util/text/text_test.go +++ b/util/text/text_test.go @@ -1,6 +1,7 @@ package text import ( + "reflect" "strings" "testing" ) @@ -43,7 +44,7 @@ func TestWrap(t *testing.T) { // A tab counts as 4 characters. { "foo\nb\t r\n baz", - "foo\nb\n r\n baz", + "foo\nb\nr\n baz", 4, }, // Trailing whitespace is removed after used for wrapping. @@ -86,19 +87,31 @@ func TestWrap(t *testing.T) { // Complete example: { " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* baz \nBAM ", - " This\nis a\nlist:\n\n\n *\nfoo\n *\nbar\n\n\n *\nbaz\nBAM\n", + " This\nis a\nlist:\n\n *\nfoo\n *\nbar\n\n\n *\nbaz\nBAM\n", 6, }, // Handle chinese (wide characters) { - "婞一枳郲逴靲屮蜧曀殳,掫乇峔掮傎溒兀緉冘仜。", - "婞一枳郲逴靲\n屮蜧曀殳,掫\n乇峔掮傎溒兀\n緉冘仜。", + "一只敏捷的狐狸跳过了一只懒狗。", + "一只敏捷的狐\n狸跳过了一只\n懒狗。", 12, }, // Handle chinese with colors { - "婞一枳郲逴\x1b[31m靲屮蜧曀殳,掫乇峔掮傎溒\x1b[0m兀緉冘仜。", - "婞一枳郲逴\x1b[31m靲\n屮蜧曀殳,掫\n乇峔掮傎溒\x1b[0m兀\n緉冘仜。", + "一只敏捷的\x1b[31m狐狸跳过\x1b[0m了一只懒狗。", + "一只敏捷的\x1b[31m狐\n狸跳过\x1b[0m了一只\n懒狗。", + 12, + }, + // Handle mixed wide and short characters + { + "敏捷 A quick 的狐狸 fox 跳过 jumps over a lazy 了一只懒狗 dog。", + "敏捷 A quick\n的狐狸 fox\n跳过 jumps\nover a lazy\n了一只懒狗\ndog。", + 12, + }, + // Handle mixed wide and short characters with color + { + "敏捷 A \x1b31mquick 的狐狸 fox 跳\x1b0m过 jumps over a lazy 了一只懒狗 dog。", + "敏捷 A \x1b31mquick\n的狐狸 fox\n跳\x1b0m过 jumps\nover a lazy\n了一只懒狗\ndog。", 12, }, } @@ -106,7 +119,7 @@ func TestWrap(t *testing.T) { for i, tc := range cases { actual, lines := Wrap(tc.Input, tc.Lim) if actual != tc.Output { - t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n`\n%s`", + t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n\n`%s`", i, tc.Input, tc.Output, actual) } @@ -144,6 +157,14 @@ func TestWrapLeftPadded(t *testing.T) { 蚗佶庂咺丌,輀鈁乇彽洢溦洰氶乇构碨洐巿阹。`, 59, 4, }, + // Handle long unbreakable words in a full stentence + { + "OT: there are alternatives to maintainer-/user-set priority, e.g. \"[user pain](http://www.lostgarden.com/2008/05/improving-bug-triage-with-user-pain.html)\".", + ` OT: there are alternatives to maintainer-/user-set + priority, e.g. "[user pain](http://www.lostgarden.com/ + 2008/05/improving-bug-triage-with-user-pain.html)".`, + 58, 4, + }, } for i, tc := range cases { @@ -273,3 +294,77 @@ func TestSplitWord(t *testing.T) { } } } + +func TestExtractApplyTermEscapes(t *testing.T) { + cases := []struct { + Input string + Output string + TermEscapes []escapeItem + }{ + // A plain ascii line with escapes. + { + "This \x1b[31mis an\x1b[0m example.", + "This is an example.", + []escapeItem{{"\x1b[31m", 5}, {"\x1b[0m", 10}}, + }, + // A plain wide line with escapes. + { + "一只敏捷\x1b[31m的狐狸\x1b[0m跳过了一只懒狗。", + "一只敏捷的狐狸跳过了一只懒狗。", + []escapeItem{{"\x1b[31m", 4}, {"\x1b[0m", 7}}, + }, + // A normal-wide mixed line with escapes. + { + "一只 A Quick 敏捷\x1b[31m的狐 Fox 狸\x1b[0m跳过了Dog一只懒狗。", + "一只 A Quick 敏捷的狐 Fox 狸跳过了Dog一只懒狗。", + []escapeItem{{"\x1b[31m", 13}, {"\x1b[0m", 21}}, + }, + } + + for i, tc := range cases { + line2, escapes := extractTermEscapes(tc.Input) + if line2 != tc.Output || !reflect.DeepEqual(escapes, tc.TermEscapes) { + t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\nLine: `%s`\nEscapes: `%+v`\n\nActual Output:\n\nLine: `%s`\nEscapes: `%+v`\n\n", + i, tc.Input, tc.Output, tc.TermEscapes, line2, escapes) + } + line3 := applyTermEscapes(line2, escapes) + if line3 != tc.Input { + t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Result:\n\n`%s`\n\nActual Result:\n\n`%s`\n\n", + i, tc.Input, tc.Input, line3) + } + } +} + +func TestSegmentLines(t *testing.T) { + cases := []struct { + Input string + Output []string + }{ + // A plain ascii line with escapes. + { + "This is an example.", + []string{"This", " ", "is", " ", "an", " ", "example."}, + }, + // A plain wide line with escapes. + { + "一只敏捷的狐狸跳过了一只懒狗。", + []string{"一", "只", "敏", "捷", "的", "狐", "狸", "跳", "过", + "了", "一", "只", "懒", "狗", "。"}, + }, + // A complex stentence. + { + "This is a 'complex' example, where 一只 and English 混合了。", + []string{"This", " ", "is", " ", "a", " ", "'complex'", " ", "example,", + " ", "where", " ", "一", "只", " ", "and", " ", "English", " ", "混", + "合", "了", "。"}, + }, + } + + for i, tc := range cases { + chunks := segmentLine(tc.Input) + if !reflect.DeepEqual(chunks, tc.Output) { + t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`[%s]`\n\nActual Output:\n\n`[%s]`\n\n", + i, tc.Input, strings.Join(tc.Output, ", "), strings.Join(chunks, ", ")) + } + } +} |