本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
2.5KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package analysis
  15. import (
  16. "bytes"
  17. "unicode/utf8"
  18. )
  19. func DeleteRune(in []rune, pos int) []rune {
  20. if pos >= len(in) {
  21. return in
  22. }
  23. copy(in[pos:], in[pos+1:])
  24. return in[:len(in)-1]
  25. }
  26. func InsertRune(in []rune, pos int, r rune) []rune {
  27. // create a new slice 1 rune larger
  28. rv := make([]rune, len(in)+1)
  29. // copy the characters before the insert pos
  30. copy(rv[0:pos], in[0:pos])
  31. // set the inserted rune
  32. rv[pos] = r
  33. // copy the characters after the insert pos
  34. copy(rv[pos+1:], in[pos:])
  35. return rv
  36. }
  37. // BuildTermFromRunesOptimistic will build a term from the provided runes
  38. // AND optimistically attempt to encode into the provided buffer
  39. // if at any point it appears the buffer is too small, a new buffer is
  40. // allocated and that is used instead
  41. // this should be used in cases where frequently the new term is the same
  42. // length or shorter than the original term (in number of bytes)
  43. func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte {
  44. rv := buf
  45. used := 0
  46. for _, r := range runes {
  47. nextLen := utf8.RuneLen(r)
  48. if used+nextLen > len(rv) {
  49. // alloc new buf
  50. buf = make([]byte, len(runes)*utf8.UTFMax)
  51. // copy work we've already done
  52. copy(buf, rv[:used])
  53. rv = buf
  54. }
  55. written := utf8.EncodeRune(rv[used:], r)
  56. used += written
  57. }
  58. return rv[:used]
  59. }
  60. func BuildTermFromRunes(runes []rune) []byte {
  61. return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes)
  62. }
  63. func TruncateRunes(input []byte, num int) []byte {
  64. runes := bytes.Runes(input)
  65. runes = runes[:len(runes)-num]
  66. out := BuildTermFromRunes(runes)
  67. return out
  68. }
  69. func RunesEndsWith(input []rune, suffix string) bool {
  70. inputLen := len(input)
  71. suffixRunes := []rune(suffix)
  72. suffixLen := len(suffixRunes)
  73. if suffixLen > inputLen {
  74. return false
  75. }
  76. for i := suffixLen - 1; i >= 0; i-- {
  77. if input[inputLen-(suffixLen-i)] != suffixRunes[i] {
  78. return false
  79. }
  80. }
  81. return true
  82. }
上海开阖软件有限公司 沪ICP备12045867号-1