本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

557 lines
17KB

  1. // Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
  2. // https://github.com/sergi/go-diff
  3. // See the included LICENSE file for license details.
  4. //
  5. // go-diff is a Go implementation of Google's Diff, Match, and Patch library
  6. // Original library is Copyright (c) 2006 Google Inc.
  7. // http://code.google.com/p/google-diff-match-patch/
  8. package diffmatchpatch
  9. import (
  10. "bytes"
  11. "errors"
  12. "math"
  13. "net/url"
  14. "regexp"
  15. "strconv"
  16. "strings"
  17. )
  18. // Patch represents one patch operation.
  19. type Patch struct {
  20. diffs []Diff
  21. Start1 int
  22. Start2 int
  23. Length1 int
  24. Length2 int
  25. }
  26. // String emulates GNU diff's format.
  27. // Header: @@ -382,8 +481,9 @@
  28. // Indices are printed as 1-based, not 0-based.
  29. func (p *Patch) String() string {
  30. var coords1, coords2 string
  31. if p.Length1 == 0 {
  32. coords1 = strconv.Itoa(p.Start1) + ",0"
  33. } else if p.Length1 == 1 {
  34. coords1 = strconv.Itoa(p.Start1 + 1)
  35. } else {
  36. coords1 = strconv.Itoa(p.Start1+1) + "," + strconv.Itoa(p.Length1)
  37. }
  38. if p.Length2 == 0 {
  39. coords2 = strconv.Itoa(p.Start2) + ",0"
  40. } else if p.Length2 == 1 {
  41. coords2 = strconv.Itoa(p.Start2 + 1)
  42. } else {
  43. coords2 = strconv.Itoa(p.Start2+1) + "," + strconv.Itoa(p.Length2)
  44. }
  45. var text bytes.Buffer
  46. _, _ = text.WriteString("@@ -" + coords1 + " +" + coords2 + " @@\n")
  47. // Escape the body of the patch with %xx notation.
  48. for _, aDiff := range p.diffs {
  49. switch aDiff.Type {
  50. case DiffInsert:
  51. _, _ = text.WriteString("+")
  52. case DiffDelete:
  53. _, _ = text.WriteString("-")
  54. case DiffEqual:
  55. _, _ = text.WriteString(" ")
  56. }
  57. _, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1))
  58. _, _ = text.WriteString("\n")
  59. }
  60. return unescaper.Replace(text.String())
  61. }
  62. // PatchAddContext increases the context until it is unique, but doesn't let the pattern expand beyond MatchMaxBits.
  63. func (dmp *DiffMatchPatch) PatchAddContext(patch Patch, text string) Patch {
  64. if len(text) == 0 {
  65. return patch
  66. }
  67. pattern := text[patch.Start2 : patch.Start2+patch.Length1]
  68. padding := 0
  69. // Look for the first and last matches of pattern in text. If two different matches are found, increase the pattern length.
  70. for strings.Index(text, pattern) != strings.LastIndex(text, pattern) &&
  71. len(pattern) < dmp.MatchMaxBits-2*dmp.PatchMargin {
  72. padding += dmp.PatchMargin
  73. maxStart := max(0, patch.Start2-padding)
  74. minEnd := min(len(text), patch.Start2+patch.Length1+padding)
  75. pattern = text[maxStart:minEnd]
  76. }
  77. // Add one chunk for good luck.
  78. padding += dmp.PatchMargin
  79. // Add the prefix.
  80. prefix := text[max(0, patch.Start2-padding):patch.Start2]
  81. if len(prefix) != 0 {
  82. patch.diffs = append([]Diff{Diff{DiffEqual, prefix}}, patch.diffs...)
  83. }
  84. // Add the suffix.
  85. suffix := text[patch.Start2+patch.Length1 : min(len(text), patch.Start2+patch.Length1+padding)]
  86. if len(suffix) != 0 {
  87. patch.diffs = append(patch.diffs, Diff{DiffEqual, suffix})
  88. }
  89. // Roll back the start points.
  90. patch.Start1 -= len(prefix)
  91. patch.Start2 -= len(prefix)
  92. // Extend the lengths.
  93. patch.Length1 += len(prefix) + len(suffix)
  94. patch.Length2 += len(prefix) + len(suffix)
  95. return patch
  96. }
  97. // PatchMake computes a list of patches.
  98. func (dmp *DiffMatchPatch) PatchMake(opt ...interface{}) []Patch {
  99. if len(opt) == 1 {
  100. diffs, _ := opt[0].([]Diff)
  101. text1 := dmp.DiffText1(diffs)
  102. return dmp.PatchMake(text1, diffs)
  103. } else if len(opt) == 2 {
  104. text1 := opt[0].(string)
  105. switch t := opt[1].(type) {
  106. case string:
  107. diffs := dmp.DiffMain(text1, t, true)
  108. if len(diffs) > 2 {
  109. diffs = dmp.DiffCleanupSemantic(diffs)
  110. diffs = dmp.DiffCleanupEfficiency(diffs)
  111. }
  112. return dmp.PatchMake(text1, diffs)
  113. case []Diff:
  114. return dmp.patchMake2(text1, t)
  115. }
  116. } else if len(opt) == 3 {
  117. return dmp.PatchMake(opt[0], opt[2])
  118. }
  119. return []Patch{}
  120. }
  121. // patchMake2 computes a list of patches to turn text1 into text2.
  122. // text2 is not provided, diffs are the delta between text1 and text2.
  123. func (dmp *DiffMatchPatch) patchMake2(text1 string, diffs []Diff) []Patch {
  124. // Check for null inputs not needed since null can't be passed in C#.
  125. patches := []Patch{}
  126. if len(diffs) == 0 {
  127. return patches // Get rid of the null case.
  128. }
  129. patch := Patch{}
  130. charCount1 := 0 // Number of characters into the text1 string.
  131. charCount2 := 0 // Number of characters into the text2 string.
  132. // Start with text1 (prepatchText) and apply the diffs until we arrive at text2 (postpatchText). We recreate the patches one by one to determine context info.
  133. prepatchText := text1
  134. postpatchText := text1
  135. for i, aDiff := range diffs {
  136. if len(patch.diffs) == 0 && aDiff.Type != DiffEqual {
  137. // A new patch starts here.
  138. patch.Start1 = charCount1
  139. patch.Start2 = charCount2
  140. }
  141. switch aDiff.Type {
  142. case DiffInsert:
  143. patch.diffs = append(patch.diffs, aDiff)
  144. patch.Length2 += len(aDiff.Text)
  145. postpatchText = postpatchText[:charCount2] +
  146. aDiff.Text + postpatchText[charCount2:]
  147. case DiffDelete:
  148. patch.Length1 += len(aDiff.Text)
  149. patch.diffs = append(patch.diffs, aDiff)
  150. postpatchText = postpatchText[:charCount2] + postpatchText[charCount2+len(aDiff.Text):]
  151. case DiffEqual:
  152. if len(aDiff.Text) <= 2*dmp.PatchMargin &&
  153. len(patch.diffs) != 0 && i != len(diffs)-1 {
  154. // Small equality inside a patch.
  155. patch.diffs = append(patch.diffs, aDiff)
  156. patch.Length1 += len(aDiff.Text)
  157. patch.Length2 += len(aDiff.Text)
  158. }
  159. if len(aDiff.Text) >= 2*dmp.PatchMargin {
  160. // Time for a new patch.
  161. if len(patch.diffs) != 0 {
  162. patch = dmp.PatchAddContext(patch, prepatchText)
  163. patches = append(patches, patch)
  164. patch = Patch{}
  165. // Unlike Unidiff, our patch lists have a rolling context. http://code.google.com/p/google-diff-match-patch/wiki/Unidiff Update prepatch text & pos to reflect the application of the just completed patch.
  166. prepatchText = postpatchText
  167. charCount1 = charCount2
  168. }
  169. }
  170. }
  171. // Update the current character count.
  172. if aDiff.Type != DiffInsert {
  173. charCount1 += len(aDiff.Text)
  174. }
  175. if aDiff.Type != DiffDelete {
  176. charCount2 += len(aDiff.Text)
  177. }
  178. }
  179. // Pick up the leftover patch if not empty.
  180. if len(patch.diffs) != 0 {
  181. patch = dmp.PatchAddContext(patch, prepatchText)
  182. patches = append(patches, patch)
  183. }
  184. return patches
  185. }
  186. // PatchDeepCopy returns an array that is identical to a given an array of patches.
  187. func (dmp *DiffMatchPatch) PatchDeepCopy(patches []Patch) []Patch {
  188. patchesCopy := []Patch{}
  189. for _, aPatch := range patches {
  190. patchCopy := Patch{}
  191. for _, aDiff := range aPatch.diffs {
  192. patchCopy.diffs = append(patchCopy.diffs, Diff{
  193. aDiff.Type,
  194. aDiff.Text,
  195. })
  196. }
  197. patchCopy.Start1 = aPatch.Start1
  198. patchCopy.Start2 = aPatch.Start2
  199. patchCopy.Length1 = aPatch.Length1
  200. patchCopy.Length2 = aPatch.Length2
  201. patchesCopy = append(patchesCopy, patchCopy)
  202. }
  203. return patchesCopy
  204. }
  205. // PatchApply merges a set of patches onto the text. Returns a patched text, as well as an array of true/false values indicating which patches were applied.
  206. func (dmp *DiffMatchPatch) PatchApply(patches []Patch, text string) (string, []bool) {
  207. if len(patches) == 0 {
  208. return text, []bool{}
  209. }
  210. // Deep copy the patches so that no changes are made to originals.
  211. patches = dmp.PatchDeepCopy(patches)
  212. nullPadding := dmp.PatchAddPadding(patches)
  213. text = nullPadding + text + nullPadding
  214. patches = dmp.PatchSplitMax(patches)
  215. x := 0
  216. // delta keeps track of the offset between the expected and actual location of the previous patch. If there are patches expected at positions 10 and 20, but the first patch was found at 12, delta is 2 and the second patch has an effective expected position of 22.
  217. delta := 0
  218. results := make([]bool, len(patches))
  219. for _, aPatch := range patches {
  220. expectedLoc := aPatch.Start2 + delta
  221. text1 := dmp.DiffText1(aPatch.diffs)
  222. var startLoc int
  223. endLoc := -1
  224. if len(text1) > dmp.MatchMaxBits {
  225. // PatchSplitMax will only provide an oversized pattern in the case of a monster delete.
  226. startLoc = dmp.MatchMain(text, text1[:dmp.MatchMaxBits], expectedLoc)
  227. if startLoc != -1 {
  228. endLoc = dmp.MatchMain(text,
  229. text1[len(text1)-dmp.MatchMaxBits:], expectedLoc+len(text1)-dmp.MatchMaxBits)
  230. if endLoc == -1 || startLoc >= endLoc {
  231. // Can't find valid trailing context. Drop this patch.
  232. startLoc = -1
  233. }
  234. }
  235. } else {
  236. startLoc = dmp.MatchMain(text, text1, expectedLoc)
  237. }
  238. if startLoc == -1 {
  239. // No match found. :(
  240. results[x] = false
  241. // Subtract the delta for this failed patch from subsequent patches.
  242. delta -= aPatch.Length2 - aPatch.Length1
  243. } else {
  244. // Found a match. :)
  245. results[x] = true
  246. delta = startLoc - expectedLoc
  247. var text2 string
  248. if endLoc == -1 {
  249. text2 = text[startLoc:int(math.Min(float64(startLoc+len(text1)), float64(len(text))))]
  250. } else {
  251. text2 = text[startLoc:int(math.Min(float64(endLoc+dmp.MatchMaxBits), float64(len(text))))]
  252. }
  253. if text1 == text2 {
  254. // Perfect match, just shove the Replacement text in.
  255. text = text[:startLoc] + dmp.DiffText2(aPatch.diffs) + text[startLoc+len(text1):]
  256. } else {
  257. // Imperfect match. Run a diff to get a framework of equivalent indices.
  258. diffs := dmp.DiffMain(text1, text2, false)
  259. if len(text1) > dmp.MatchMaxBits && float64(dmp.DiffLevenshtein(diffs))/float64(len(text1)) > dmp.PatchDeleteThreshold {
  260. // The end points match, but the content is unacceptably bad.
  261. results[x] = false
  262. } else {
  263. diffs = dmp.DiffCleanupSemanticLossless(diffs)
  264. index1 := 0
  265. for _, aDiff := range aPatch.diffs {
  266. if aDiff.Type != DiffEqual {
  267. index2 := dmp.DiffXIndex(diffs, index1)
  268. if aDiff.Type == DiffInsert {
  269. // Insertion
  270. text = text[:startLoc+index2] + aDiff.Text + text[startLoc+index2:]
  271. } else if aDiff.Type == DiffDelete {
  272. // Deletion
  273. startIndex := startLoc + index2
  274. text = text[:startIndex] +
  275. text[startIndex+dmp.DiffXIndex(diffs, index1+len(aDiff.Text))-index2:]
  276. }
  277. }
  278. if aDiff.Type != DiffDelete {
  279. index1 += len(aDiff.Text)
  280. }
  281. }
  282. }
  283. }
  284. }
  285. x++
  286. }
  287. // Strip the padding off.
  288. text = text[len(nullPadding) : len(nullPadding)+(len(text)-2*len(nullPadding))]
  289. return text, results
  290. }
  291. // PatchAddPadding adds some padding on text start and end so that edges can match something.
  292. // Intended to be called only from within patchApply.
  293. func (dmp *DiffMatchPatch) PatchAddPadding(patches []Patch) string {
  294. paddingLength := dmp.PatchMargin
  295. nullPadding := ""
  296. for x := 1; x <= paddingLength; x++ {
  297. nullPadding += string(x)
  298. }
  299. // Bump all the patches forward.
  300. for i := range patches {
  301. patches[i].Start1 += paddingLength
  302. patches[i].Start2 += paddingLength
  303. }
  304. // Add some padding on start of first diff.
  305. if len(patches[0].diffs) == 0 || patches[0].diffs[0].Type != DiffEqual {
  306. // Add nullPadding equality.
  307. patches[0].diffs = append([]Diff{Diff{DiffEqual, nullPadding}}, patches[0].diffs...)
  308. patches[0].Start1 -= paddingLength // Should be 0.
  309. patches[0].Start2 -= paddingLength // Should be 0.
  310. patches[0].Length1 += paddingLength
  311. patches[0].Length2 += paddingLength
  312. } else if paddingLength > len(patches[0].diffs[0].Text) {
  313. // Grow first equality.
  314. extraLength := paddingLength - len(patches[0].diffs[0].Text)
  315. patches[0].diffs[0].Text = nullPadding[len(patches[0].diffs[0].Text):] + patches[0].diffs[0].Text
  316. patches[0].Start1 -= extraLength
  317. patches[0].Start2 -= extraLength
  318. patches[0].Length1 += extraLength
  319. patches[0].Length2 += extraLength
  320. }
  321. // Add some padding on end of last diff.
  322. last := len(patches) - 1
  323. if len(patches[last].diffs) == 0 || patches[last].diffs[len(patches[last].diffs)-1].Type != DiffEqual {
  324. // Add nullPadding equality.
  325. patches[last].diffs = append(patches[last].diffs, Diff{DiffEqual, nullPadding})
  326. patches[last].Length1 += paddingLength
  327. patches[last].Length2 += paddingLength
  328. } else if paddingLength > len(patches[last].diffs[len(patches[last].diffs)-1].Text) {
  329. // Grow last equality.
  330. lastDiff := patches[last].diffs[len(patches[last].diffs)-1]
  331. extraLength := paddingLength - len(lastDiff.Text)
  332. patches[last].diffs[len(patches[last].diffs)-1].Text += nullPadding[:extraLength]
  333. patches[last].Length1 += extraLength
  334. patches[last].Length2 += extraLength
  335. }
  336. return nullPadding
  337. }
  338. // PatchSplitMax looks through the patches and breaks up any which are longer than the maximum limit of the match algorithm.
  339. // Intended to be called only from within patchApply.
  340. func (dmp *DiffMatchPatch) PatchSplitMax(patches []Patch) []Patch {
  341. patchSize := dmp.MatchMaxBits
  342. for x := 0; x < len(patches); x++ {
  343. if patches[x].Length1 <= patchSize {
  344. continue
  345. }
  346. bigpatch := patches[x]
  347. // Remove the big old patch.
  348. patches = append(patches[:x], patches[x+1:]...)
  349. x--
  350. Start1 := bigpatch.Start1
  351. Start2 := bigpatch.Start2
  352. precontext := ""
  353. for len(bigpatch.diffs) != 0 {
  354. // Create one of several smaller patches.
  355. patch := Patch{}
  356. empty := true
  357. patch.Start1 = Start1 - len(precontext)
  358. patch.Start2 = Start2 - len(precontext)
  359. if len(precontext) != 0 {
  360. patch.Length1 = len(precontext)
  361. patch.Length2 = len(precontext)
  362. patch.diffs = append(patch.diffs, Diff{DiffEqual, precontext})
  363. }
  364. for len(bigpatch.diffs) != 0 && patch.Length1 < patchSize-dmp.PatchMargin {
  365. diffType := bigpatch.diffs[0].Type
  366. diffText := bigpatch.diffs[0].Text
  367. if diffType == DiffInsert {
  368. // Insertions are harmless.
  369. patch.Length2 += len(diffText)
  370. Start2 += len(diffText)
  371. patch.diffs = append(patch.diffs, bigpatch.diffs[0])
  372. bigpatch.diffs = bigpatch.diffs[1:]
  373. empty = false
  374. } else if diffType == DiffDelete && len(patch.diffs) == 1 && patch.diffs[0].Type == DiffEqual && len(diffText) > 2*patchSize {
  375. // This is a large deletion. Let it pass in one chunk.
  376. patch.Length1 += len(diffText)
  377. Start1 += len(diffText)
  378. empty = false
  379. patch.diffs = append(patch.diffs, Diff{diffType, diffText})
  380. bigpatch.diffs = bigpatch.diffs[1:]
  381. } else {
  382. // Deletion or equality. Only take as much as we can stomach.
  383. diffText = diffText[:min(len(diffText), patchSize-patch.Length1-dmp.PatchMargin)]
  384. patch.Length1 += len(diffText)
  385. Start1 += len(diffText)
  386. if diffType == DiffEqual {
  387. patch.Length2 += len(diffText)
  388. Start2 += len(diffText)
  389. } else {
  390. empty = false
  391. }
  392. patch.diffs = append(patch.diffs, Diff{diffType, diffText})
  393. if diffText == bigpatch.diffs[0].Text {
  394. bigpatch.diffs = bigpatch.diffs[1:]
  395. } else {
  396. bigpatch.diffs[0].Text =
  397. bigpatch.diffs[0].Text[len(diffText):]
  398. }
  399. }
  400. }
  401. // Compute the head context for the next patch.
  402. precontext = dmp.DiffText2(patch.diffs)
  403. precontext = precontext[max(0, len(precontext)-dmp.PatchMargin):]
  404. postcontext := ""
  405. // Append the end context for this patch.
  406. if len(dmp.DiffText1(bigpatch.diffs)) > dmp.PatchMargin {
  407. postcontext = dmp.DiffText1(bigpatch.diffs)[:dmp.PatchMargin]
  408. } else {
  409. postcontext = dmp.DiffText1(bigpatch.diffs)
  410. }
  411. if len(postcontext) != 0 {
  412. patch.Length1 += len(postcontext)
  413. patch.Length2 += len(postcontext)
  414. if len(patch.diffs) != 0 && patch.diffs[len(patch.diffs)-1].Type == DiffEqual {
  415. patch.diffs[len(patch.diffs)-1].Text += postcontext
  416. } else {
  417. patch.diffs = append(patch.diffs, Diff{DiffEqual, postcontext})
  418. }
  419. }
  420. if !empty {
  421. x++
  422. patches = append(patches[:x], append([]Patch{patch}, patches[x:]...)...)
  423. }
  424. }
  425. }
  426. return patches
  427. }
  428. // PatchToText takes a list of patches and returns a textual representation.
  429. func (dmp *DiffMatchPatch) PatchToText(patches []Patch) string {
  430. var text bytes.Buffer
  431. for _, aPatch := range patches {
  432. _, _ = text.WriteString(aPatch.String())
  433. }
  434. return text.String()
  435. }
  436. // PatchFromText parses a textual representation of patches and returns a List of Patch objects.
  437. func (dmp *DiffMatchPatch) PatchFromText(textline string) ([]Patch, error) {
  438. patches := []Patch{}
  439. if len(textline) == 0 {
  440. return patches, nil
  441. }
  442. text := strings.Split(textline, "\n")
  443. textPointer := 0
  444. patchHeader := regexp.MustCompile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$")
  445. var patch Patch
  446. var sign uint8
  447. var line string
  448. for textPointer < len(text) {
  449. if !patchHeader.MatchString(text[textPointer]) {
  450. return patches, errors.New("Invalid patch string: " + text[textPointer])
  451. }
  452. patch = Patch{}
  453. m := patchHeader.FindStringSubmatch(text[textPointer])
  454. patch.Start1, _ = strconv.Atoi(m[1])
  455. if len(m[2]) == 0 {
  456. patch.Start1--
  457. patch.Length1 = 1
  458. } else if m[2] == "0" {
  459. patch.Length1 = 0
  460. } else {
  461. patch.Start1--
  462. patch.Length1, _ = strconv.Atoi(m[2])
  463. }
  464. patch.Start2, _ = strconv.Atoi(m[3])
  465. if len(m[4]) == 0 {
  466. patch.Start2--
  467. patch.Length2 = 1
  468. } else if m[4] == "0" {
  469. patch.Length2 = 0
  470. } else {
  471. patch.Start2--
  472. patch.Length2, _ = strconv.Atoi(m[4])
  473. }
  474. textPointer++
  475. for textPointer < len(text) {
  476. if len(text[textPointer]) > 0 {
  477. sign = text[textPointer][0]
  478. } else {
  479. textPointer++
  480. continue
  481. }
  482. line = text[textPointer][1:]
  483. line = strings.Replace(line, "+", "%2b", -1)
  484. line, _ = url.QueryUnescape(line)
  485. if sign == '-' {
  486. // Deletion.
  487. patch.diffs = append(patch.diffs, Diff{DiffDelete, line})
  488. } else if sign == '+' {
  489. // Insertion.
  490. patch.diffs = append(patch.diffs, Diff{DiffInsert, line})
  491. } else if sign == ' ' {
  492. // Minor equality.
  493. patch.diffs = append(patch.diffs, Diff{DiffEqual, line})
  494. } else if sign == '@' {
  495. // Start of next patch.
  496. break
  497. } else {
  498. // WTF?
  499. return patches, errors.New("Invalid patch mode '" + string(sign) + "' in: " + string(line))
  500. }
  501. textPointer++
  502. }
  503. patches = append(patches, patch)
  504. }
  505. return patches, nil
  506. }
上海开阖软件有限公司 沪ICP备12045867号-1