stringutil.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835
  1. /*
  2. * Public Domain Software
  3. *
  4. * I (Matthias Ladkau) am the author of the source code in this file.
  5. * I have placed the source code in this file in the public domain.
  6. *
  7. * For further information see: http://creativecommons.org/publicdomain/zero/1.0/
  8. */
  9. /*
  10. Package stringutil contains common function for string operations.
  11. */
  12. package stringutil
  13. import (
  14. "bytes"
  15. "crypto/md5"
  16. "encoding/json"
  17. "fmt"
  18. "regexp"
  19. "sort"
  20. "strconv"
  21. "strings"
  22. "unicode"
  23. "unicode/utf8"
  24. )
  25. /*
  26. LongestCommonPrefix determines the longest common prefix of a given list of strings.
  27. */
  28. func LongestCommonPrefix(s []string) string {
  29. var res string
  30. commonPrefix := func(str1, str2 string) string {
  31. var buf bytes.Buffer
  32. rs2 := StringToRuneSlice(str2)
  33. rs2len := len(rs2)
  34. for i, c := range str1 {
  35. if i >= rs2len {
  36. break
  37. } else if c == rs2[i] {
  38. buf.WriteRune(c)
  39. }
  40. }
  41. return buf.String()
  42. }
  43. lens := len(s)
  44. if lens > 0 {
  45. res = s[0]
  46. for i := 1; i < lens; i++ {
  47. res = commonPrefix(res, s[i])
  48. }
  49. }
  50. return res
  51. }
  52. /*
  53. PrintStringTable prints a given list of strings as table with c columns.
  54. */
  55. func PrintStringTable(ss []string, c int) string {
  56. var ret bytes.Buffer
  57. if c < 1 {
  58. return ""
  59. }
  60. // Determine max widths of columns
  61. maxWidths := make(map[int]int)
  62. for i, s := range ss {
  63. col := i % c
  64. if l := utf8.RuneCountInString(s); l > maxWidths[col] {
  65. maxWidths[col] = l
  66. }
  67. }
  68. for i, s := range ss {
  69. col := i % c
  70. if i < len(ss)-1 {
  71. var formatString string
  72. if col != c-1 {
  73. formatString = fmt.Sprintf("%%-%vv ", maxWidths[col])
  74. } else {
  75. formatString = "%v"
  76. }
  77. ret.WriteString(fmt.Sprintf(formatString, s))
  78. } else {
  79. ret.WriteString(fmt.Sprintln(s))
  80. break
  81. }
  82. if col == c-1 {
  83. ret.WriteString(fmt.Sprintln())
  84. }
  85. }
  86. return ret.String()
  87. }
  88. /*
  89. GraphicStringTableSymbols defines how to draw a graphic table.
  90. */
  91. type GraphicStringTableSymbols struct {
  92. BoxHorizontal string
  93. BoxVertical string
  94. BoxMiddle string
  95. BoxCornerTopLeft string
  96. BoxCornerTopRight string
  97. BoxCornerBottomLeft string
  98. BoxCornerBottomRight string
  99. BoxTopMiddle string
  100. BoxLeftMiddle string
  101. BoxRightMiddle string
  102. BoxBottomMiddle string
  103. }
  104. /*
  105. Standard graphic table drawing definitions.
  106. */
  107. var (
  108. SingleLineTable = &GraphicStringTableSymbols{"─", "│", "┼", "┌", "┐", "└", "┘", "┬", "├", "┤", "┴"}
  109. DoubleLineTable = &GraphicStringTableSymbols{"═", "║", "╬", "╔", "╗", "╚", "╝", "╦", "╠", "╣", "╩"}
  110. SingleDoubleLineTable = &GraphicStringTableSymbols{"═", "│", "╪", "╒", "╕", "╘", "╛", "╤", "╞", "╡", "╧"}
  111. DoubleSingleLineTable = &GraphicStringTableSymbols{"─", "║", "╫", "╓", "╖", "╙", "╜", "╥", "╟", "╢", "╨"}
  112. MonoTable = &GraphicStringTableSymbols{"#", "#", "#", "#", "#", "#", "#", "#", "#", "#", "#"}
  113. )
  114. /*
  115. PrintGraphicStringTable prints a given list of strings in a graphic table
  116. with c columns - creates a header after n rows using syms as drawing symbols.
  117. */
  118. func PrintGraphicStringTable(ss []string, c int, n int, syms *GraphicStringTableSymbols) string {
  119. var topline, bottomline, middleline, ret bytes.Buffer
  120. if c < 1 {
  121. return ""
  122. }
  123. if syms == nil {
  124. syms = MonoTable
  125. }
  126. // Determine max widths of columns
  127. maxWidths := make(map[int]int)
  128. for i, s := range ss {
  129. col := i % c
  130. l := utf8.RuneCountInString(s)
  131. if l > maxWidths[col] {
  132. maxWidths[col] = l
  133. }
  134. }
  135. // Determine total width and create top, middle and bottom line
  136. totalWidth := 1
  137. topline.WriteString(syms.BoxCornerTopLeft)
  138. bottomline.WriteString(syms.BoxCornerBottomLeft)
  139. middleline.WriteString(syms.BoxLeftMiddle)
  140. for i := 0; i < len(maxWidths); i++ {
  141. totalWidth += maxWidths[i] + 2
  142. topline.WriteString(GenerateRollingString(syms.BoxHorizontal, maxWidths[i]+1))
  143. bottomline.WriteString(GenerateRollingString(syms.BoxHorizontal, maxWidths[i]+1))
  144. middleline.WriteString(GenerateRollingString(syms.BoxHorizontal, maxWidths[i]+1))
  145. if i < len(maxWidths)-1 {
  146. topline.WriteString(syms.BoxTopMiddle)
  147. bottomline.WriteString(syms.BoxBottomMiddle)
  148. middleline.WriteString(syms.BoxMiddle)
  149. }
  150. }
  151. topline.WriteString(syms.BoxCornerTopRight)
  152. bottomline.WriteString(syms.BoxCornerBottomRight)
  153. middleline.WriteString(syms.BoxRightMiddle)
  154. // Draw the table
  155. ret.WriteString(topline.String())
  156. ret.WriteString(fmt.Sprintln())
  157. row := 0
  158. for i, s := range ss {
  159. col := i % c
  160. ret.WriteString(syms.BoxVertical)
  161. if i < len(ss)-1 {
  162. formatString := fmt.Sprintf("%%-%vv ", maxWidths[col])
  163. ret.WriteString(fmt.Sprintf(formatString, s))
  164. } else {
  165. formatString := fmt.Sprintf("%%-%vv ", maxWidths[col])
  166. ret.WriteString(fmt.Sprintf(formatString, s))
  167. for col < c-1 && col < len(ss)-1 {
  168. col++
  169. ret.WriteString(syms.BoxVertical)
  170. ret.WriteString(GenerateRollingString(" ", maxWidths[col]))
  171. ret.WriteString(" ")
  172. }
  173. ret.WriteString(syms.BoxVertical)
  174. ret.WriteString(fmt.Sprintln())
  175. break
  176. }
  177. if col == c-1 {
  178. ret.WriteString(syms.BoxVertical)
  179. ret.WriteString(fmt.Sprintln())
  180. row++
  181. if row == n {
  182. ret.WriteString(middleline.String())
  183. ret.WriteString(fmt.Sprintln())
  184. }
  185. }
  186. }
  187. ret.WriteString(bottomline.String())
  188. ret.WriteString(fmt.Sprintln())
  189. return ret.String()
  190. }
  191. /*
  192. PrintCSVTable prints a given list of strings in a CSV table with c
  193. columns.
  194. */
  195. func PrintCSVTable(ss []string, c int) string {
  196. var ret bytes.Buffer
  197. var col int
  198. if c < 1 || len(ss) == 0 {
  199. return ""
  200. }
  201. // Write the table
  202. for i, s := range ss {
  203. col = i % c
  204. ret.WriteString(strings.TrimSpace(fmt.Sprint(s)))
  205. if col == c-1 {
  206. ret.WriteString(fmt.Sprintln())
  207. } else if i < len(ss)-1 {
  208. ret.WriteString(", ")
  209. }
  210. }
  211. if col != c-1 {
  212. ret.WriteString(fmt.Sprintln())
  213. }
  214. return ret.String()
  215. }
  216. /*
  217. RuneSliceToString converts a slice of runes into a string.
  218. */
  219. func RuneSliceToString(buf []rune) string {
  220. var sbuf bytes.Buffer
  221. for _, r := range buf {
  222. fmt.Fprintf(&sbuf, "%c", r)
  223. }
  224. return sbuf.String()
  225. }
  226. /*
  227. StringToRuneSlice converts a string into a slice of runes.
  228. */
  229. func StringToRuneSlice(s string) []rune {
  230. var buf []rune
  231. for _, r := range s {
  232. buf = append(buf, r)
  233. }
  234. return buf
  235. }
  236. /*
  237. Plural returns the string 's' if the parameter is greater than one or
  238. if the parameter is 0.
  239. */
  240. func Plural(l int) string {
  241. if l > 1 || l == 0 {
  242. return "s"
  243. }
  244. return ""
  245. }
  246. /*
  247. GlobParseError describes a failure to parse a glob expression
  248. and gives the offending expression.
  249. */
  250. type GlobParseError struct {
  251. Msg string
  252. Pos int
  253. Glob string
  254. }
  255. /*
  256. Error Returns a string representation of the error.
  257. */
  258. func (e *GlobParseError) Error() string {
  259. return fmt.Sprintf("%s at %d of %s", e.Msg, e.Pos, e.Glob)
  260. }
  261. /*
  262. GlobToRegex converts a given glob expression into a regular expression.
  263. */
  264. func GlobToRegex(glob string) (string, error) {
  265. buf := new(bytes.Buffer)
  266. brackets, braces := 0, 0
  267. n := len(glob)
  268. for i := 0; i < n; i++ {
  269. char := glob[i]
  270. switch char {
  271. case '\\':
  272. // Escapes
  273. i++
  274. if i >= n {
  275. return "", &GlobParseError{"Missing escaped character", i, glob}
  276. }
  277. buf.WriteByte(char)
  278. buf.WriteByte(glob[i])
  279. continue
  280. case '*':
  281. // Wildcard match multiple characters
  282. buf.WriteByte('.')
  283. case '?':
  284. // Wildcard match any single character
  285. buf.WriteByte('.')
  286. continue
  287. case '{':
  288. // Group (always non-capturing)
  289. buf.WriteString("(?:")
  290. braces++
  291. continue
  292. case '}':
  293. // End of group
  294. if braces > 0 {
  295. braces--
  296. buf.WriteByte(')')
  297. continue
  298. }
  299. case '[':
  300. // Character class
  301. if brackets > 0 {
  302. return "", &GlobParseError{"Unclosed character class", i, glob}
  303. }
  304. brackets++
  305. case ']':
  306. // End of character class
  307. brackets = 0
  308. case ',':
  309. // OR in groups
  310. if braces > 0 {
  311. buf.WriteByte('|')
  312. } else {
  313. buf.WriteByte(char)
  314. }
  315. continue
  316. case '^':
  317. // Beginning of line in character classes otherwise normal
  318. // escaped character
  319. if brackets == 0 {
  320. buf.WriteByte('\\')
  321. }
  322. case '!':
  323. // [! is the equivalent of [^ in glob
  324. if brackets > 0 && glob[i-1] == '[' {
  325. buf.WriteByte('^')
  326. } else {
  327. buf.WriteByte('!')
  328. }
  329. continue
  330. case '.', '$', '(', ')', '|', '+':
  331. // Escape all regex characters which are not glob characters
  332. buf.WriteByte('\\')
  333. }
  334. buf.WriteByte(char)
  335. }
  336. if brackets > 0 {
  337. return "", &GlobParseError{"Unclosed character class", n, glob}
  338. } else if braces > 0 {
  339. return "", &GlobParseError{"Unclosed group", n, glob}
  340. }
  341. return buf.String(), nil
  342. }
  343. /*
  344. GlobStartingLiterals gets the first literals of a glob string.
  345. */
  346. func GlobStartingLiterals(glob string) string {
  347. buf := new(bytes.Buffer)
  348. n := len(glob)
  349. for i := 0; i < n; i++ {
  350. char := glob[i]
  351. if char == '\\' || char == '*' || char == '?' ||
  352. char == '{' || char == '[' {
  353. break
  354. }
  355. buf.WriteByte(char)
  356. }
  357. return buf.String()
  358. }
  359. /*
  360. LevenshteinDistance computes the Levenshtein distance between two strings.
  361. */
  362. func LevenshteinDistance(str1, str2 string) int {
  363. if str1 == str2 {
  364. return 0
  365. }
  366. rslice1 := StringToRuneSlice(str1)
  367. rslice2 := StringToRuneSlice(str2)
  368. n, m := len(rslice1), len(rslice2)
  369. if n == 0 {
  370. return m
  371. } else if m == 0 {
  372. return n
  373. }
  374. v0 := make([]int, m+1, m+1)
  375. v1 := make([]int, m+1, m+1)
  376. for i := 0; i <= m; i++ {
  377. v0[i] = i
  378. }
  379. var cost int
  380. for i := 0; i < n; i++ {
  381. v1[0] = i + 1
  382. for j := 0; j < m; j++ {
  383. if rslice1[i] == rslice2[j] {
  384. cost = 0
  385. } else {
  386. cost = 1
  387. }
  388. v1[j+1] = min3(v1[j]+1, v0[j+1]+1, v0[j]+cost)
  389. }
  390. v0, v1 = v1, v0
  391. }
  392. return v0[m]
  393. }
  394. /*
  395. 3 way min for computing the Levenshtein distance.
  396. */
  397. func min3(a, b, c int) int {
  398. ret := a
  399. if b < ret {
  400. ret = b
  401. }
  402. if c < ret {
  403. ret = c
  404. }
  405. return ret
  406. }
  407. /*
  408. VersionStringCompare compares two version strings. Returns: 0 if the strings are
  409. equal; -1 if the first string is smaller; 1 if the first string is greater.
  410. */
  411. func VersionStringCompare(str1, str2 string) int {
  412. val1 := strings.Split(str1, ".")
  413. val2 := strings.Split(str2, ".")
  414. idx := 0
  415. for idx < len(val1) && idx < len(val2) && val1[idx] == val2[idx] {
  416. idx++
  417. }
  418. switch {
  419. case idx < len(val1) && idx < len(val2):
  420. return versionStringPartCompare(val1[idx], val2[idx])
  421. case len(val1) > len(val2):
  422. return 1
  423. case len(val1) < len(val2):
  424. return -1
  425. }
  426. return 0
  427. }
  428. /*
  429. versionStringPartCompare compares two version string parts. Returns: 0 if the
  430. strings are equal; -1 if the first string is smaller; 1 if the first string is
  431. greater.
  432. */
  433. func versionStringPartCompare(str1, str2 string) int {
  434. pat := regexp.MustCompile("^([0-9]+)([\\D].*)?")
  435. res1 := pat.FindStringSubmatch(str1)
  436. res2 := pat.FindStringSubmatch(str2)
  437. switch {
  438. case res1 == nil && res2 == nil:
  439. return strings.Compare(str1, str2)
  440. case res1 == nil && res2 != nil:
  441. return -1
  442. case res1 != nil && res2 == nil:
  443. return 1
  444. }
  445. v1, _ := strconv.Atoi(res1[1])
  446. v2, _ := strconv.Atoi(res2[1])
  447. res := 0
  448. switch {
  449. case v1 > v2:
  450. res = 1
  451. case v1 < v2:
  452. res = -1
  453. }
  454. if res == 0 {
  455. switch {
  456. case res1[2] != "" && res2[2] == "":
  457. return 1
  458. case res1[2] == "" && res2[2] != "":
  459. return -1
  460. case res1[2] != "" && res2[2] != "":
  461. return strings.Compare(res1[2], res2[2])
  462. }
  463. }
  464. return res
  465. }
  466. /*
  467. IsAlphaNumeric checks if a string contains only alpha numerical characters or "_".
  468. */
  469. func IsAlphaNumeric(str string) bool {
  470. ret, _ := regexp.MatchString("^[a-zA-Z0-9_]*$", str)
  471. return ret
  472. }
  473. /*
  474. IsTrueValue checks if a given string is a true value.
  475. */
  476. func IsTrueValue(str string) bool {
  477. str = strings.ToLower(str)
  478. return str == "true" || str == "yes" || str == "on" || str == "ok" ||
  479. str == "1" || str == "active" || str == "enabled"
  480. }
  481. /*
  482. IndexOf returns the index of str in slice or -1 if it does not exist.
  483. */
  484. func IndexOf(str string, slice []string) int {
  485. for i, s := range slice {
  486. if str == s {
  487. return i
  488. }
  489. }
  490. return -1
  491. }
  492. /*
  493. MapKeys returns the keys of a map as a sorted list.
  494. */
  495. func MapKeys(m map[string]interface{}) []string {
  496. ret := make([]string, 0, len(m))
  497. for k := range m {
  498. ret = append(ret, k)
  499. }
  500. sort.Strings(ret)
  501. return ret
  502. }
  503. /*
  504. GenerateRollingString creates a string by repeating a given string pattern.
  505. */
  506. func GenerateRollingString(seq string, size int) string {
  507. var buf bytes.Buffer
  508. rs := StringToRuneSlice(seq)
  509. l := len(rs)
  510. if l == 0 {
  511. return ""
  512. }
  513. for i := 0; i < size; i++ {
  514. buf.WriteRune(rs[i%l])
  515. }
  516. return buf.String()
  517. }
  518. /*
  519. ConvertToString tries to convert a given object into a stable string. This
  520. function can be used to display nested maps.
  521. */
  522. func ConvertToString(v interface{}) string {
  523. if vStringer, ok := v.(fmt.Stringer); ok {
  524. return vStringer.String()
  525. }
  526. if _, err := json.Marshal(v); err != nil {
  527. v = ConvertToJSONMarshalableObject(v)
  528. }
  529. if vString, ok := v.(string); ok {
  530. return vString
  531. } else if res, err := json.Marshal(v); err == nil {
  532. return string(res)
  533. }
  534. return fmt.Sprint(v)
  535. }
  536. /*
  537. ConvertToPrettyString tries to convert a given object into a stable human-readable
  538. string.
  539. */
  540. func ConvertToPrettyString(v interface{}) string {
  541. var res []byte
  542. var err error
  543. if res, err = json.MarshalIndent(v, "", " "); err != nil {
  544. if res, err = json.MarshalIndent(ConvertToJSONMarshalableObject(v), "", " "); err != nil {
  545. res = []byte(fmt.Sprint(v))
  546. }
  547. }
  548. return string(res)
  549. }
  550. /*
  551. ConvertToJSONMarshalableObject converts container contents into objects which
  552. can be converted into JSON strings.
  553. */
  554. func ConvertToJSONMarshalableObject(v interface{}) interface{} {
  555. res := v
  556. if mapContainer, ok := v.(map[interface{}]interface{}); ok {
  557. newRes := make(map[string]interface{})
  558. for mk, mv := range mapContainer {
  559. newRes[ConvertToString(mk)] = ConvertToJSONMarshalableObject(mv)
  560. }
  561. res = newRes
  562. } else if mapList, ok := v.([]interface{}); ok {
  563. newRes := make([]interface{}, len(mapList))
  564. for i, lv := range mapList {
  565. newRes[i] = ConvertToJSONMarshalableObject(lv)
  566. }
  567. res = newRes
  568. }
  569. return res
  570. }
  571. /*
  572. MD5HexString calculates the MD5 sum of a string and returns it as hex string.
  573. */
  574. func MD5HexString(str string) string {
  575. return fmt.Sprintf("%x", md5.Sum([]byte(str)))
  576. }
  577. /*
  578. LengthConstantEquals compares two strings in length-constant time. This
  579. function is deliberately inefficient in that it does not stop at the earliest
  580. possible time. This is to prevent timing attacks when comparing password
  581. hashes.
  582. */
  583. func LengthConstantEquals(str1 []byte, str2 []byte) bool {
  584. diff := len(str1) ^ len(str2)
  585. for i := 0; i < len(str1) && i < len(str2); i++ {
  586. diff |= int(str1[i] ^ str2[i])
  587. }
  588. return diff == 0
  589. }
  590. /*
  591. CamelCaseSplit splits a camel case string into a slice.
  592. */
  593. func CamelCaseSplit(src string) []string {
  594. var result []string
  595. if !utf8.ValidString(src) {
  596. result = []string{src}
  597. } else {
  598. type rType int
  599. const (
  600. undefined rType = iota
  601. lower
  602. upper
  603. digit
  604. other
  605. )
  606. var current, previous rType
  607. var runes [][]rune
  608. for _, r := range src {
  609. if unicode.IsLower(r) {
  610. current = lower
  611. } else if unicode.IsUpper(r) {
  612. current = upper
  613. } else if unicode.IsDigit(r) {
  614. current = digit
  615. } else {
  616. current = other
  617. }
  618. if current == previous {
  619. runes[len(runes)-1] = append(runes[len(runes)-1], r)
  620. } else {
  621. runes = append(runes, []rune{r})
  622. previous = current
  623. }
  624. }
  625. for i := 0; i < len(runes)-1; i++ {
  626. // Detect cases like "ROCKH" "ard" and correct them to
  627. // "ROCK" "Hard"
  628. if unicode.IsUpper(runes[i][0]) && unicode.IsLower(runes[i+1][0]) {
  629. runes[i+1] = append([]rune{runes[i][len(runes[i])-1]}, runes[i+1]...)
  630. runes[i] = runes[i][:len(runes[i])-1]
  631. }
  632. }
  633. for _, s := range runes {
  634. if len(s) > 0 {
  635. result = append(result, string(s))
  636. }
  637. }
  638. }
  639. return result
  640. }
  641. /*
  642. ChunkSplit splits a string into chunks of a defined size. Attempts to only split
  643. at white space characters if spaceSplit is set.
  644. */
  645. func ChunkSplit(s string, size int, spaceSplit bool) []string {
  646. var res []string
  647. var cl, wpos int
  648. if size >= len(s) {
  649. return []string{s}
  650. }
  651. chunk := make([]rune, size)
  652. for _, r := range s {
  653. chunk[cl] = r
  654. cl++
  655. if spaceSplit && unicode.IsSpace(r) {
  656. wpos = cl
  657. }
  658. if cl == size {
  659. if !spaceSplit || wpos == 0 {
  660. res = append(res, string(chunk))
  661. cl = 0
  662. } else {
  663. res = append(res, string(chunk[:wpos]))
  664. copy(chunk, chunk[wpos:])
  665. cl = len(chunk[wpos:])
  666. wpos = 0
  667. }
  668. }
  669. }
  670. if cl > 0 {
  671. res = append(res, string(chunk[:cl]))
  672. }
  673. return res
  674. }