本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

425 lines
14KB

  1. package merkletrie
  2. // The focus of this difftree implementation is to save time by
  3. // skipping whole directories if their hash is the same in both
  4. // trees.
  5. //
  6. // The diff algorithm implemented here is based on the doubleiter
  7. // type defined in this same package; we will iterate over both
  8. // trees at the same time, while comparing the current noders in
  9. // each iterator. Depending on how they differ we will output the
  10. // corresponding chages and move the iterators further over both
  11. // trees.
  12. //
  13. // The table bellow show all the possible comparison results, along
  14. // with what changes should we produce and how to advance the
  15. // iterators.
  16. //
  17. // The table is implemented by the switches in this function,
  18. // diffTwoNodes, diffTwoNodesSameName and diffTwoDirs.
  19. //
  20. // Many Bothans died to bring us this information, make sure you
  21. // understand the table before modifying this code.
  22. // # Cases
  23. //
  24. // When comparing noders in both trees you will found yourself in
  25. // one of 169 possible cases, but if we ignore moves, we can
  26. // simplify a lot the search space into the following table:
  27. //
  28. // - "-": nothing, no file or directory
  29. // - a<>: an empty file named "a".
  30. // - a<1>: a file named "a", with "1" as its contents.
  31. // - a<2>: a file named "a", with "2" as its contents.
  32. // - a(): an empty dir named "a".
  33. // - a(...): a dir named "a", with some files and/or dirs inside (possibly
  34. // empty).
  35. // - a(;;;): a dir named "a", with some other files and/or dirs inside
  36. // (possibly empty), which different from the ones in "a(...)".
  37. //
  38. // \ to - a<> a<1> a<2> a() a(...) a(;;;)
  39. // from \
  40. // - 00 01 02 03 04 05 06
  41. // a<> 10 11 12 13 14 15 16
  42. // a<1> 20 21 22 23 24 25 26
  43. // a<2> 30 31 32 33 34 35 36
  44. // a() 40 41 42 43 44 45 46
  45. // a(...) 50 51 52 53 54 55 56
  46. // a(;;;) 60 61 62 63 64 65 66
  47. //
  48. // Every (from, to) combination in the table is a special case, but
  49. // some of them can be merged into some more general cases, for
  50. // instance 11 and 22 can be merged into the general case: both
  51. // noders are equal.
  52. //
  53. // Here is a full list of all the cases that are similar and how to
  54. // merge them together into more general cases. Each general case
  55. // is labeled with an uppercase letter for further reference, and it
  56. // is followed by the pseudocode of the checks you have to perfrom
  57. // on both noders to see if you are in such a case, the actions to
  58. // perform (i.e. what changes to output) and how to advance the
  59. // iterators of each tree to continue the comparison process.
  60. //
  61. // ## A. Impossible: 00
  62. //
  63. // ## B. Same thing on both sides: 11, 22, 33, 44, 55, 66
  64. // - check: `SameName() && SameHash()`
  65. // - action: do nothing.
  66. // - advance: `FromNext(); ToNext()`
  67. //
  68. // ### C. To was created: 01, 02, 03, 04, 05, 06
  69. // - check: `DifferentName() && ToBeforeFrom()`
  70. // - action: inserRecursively(to)
  71. // - advance: `ToNext()`
  72. //
  73. // ### D. From was deleted: 10, 20, 30, 40, 50, 60
  74. // - check: `DifferentName() && FromBeforeTo()`
  75. // - action: `DeleteRecursively(from)`
  76. // - advance: `FromNext()`
  77. //
  78. // ### E. Empty file to file with contents: 12, 13
  79. // - check: `SameName() && DifferentHash() && FromIsFile() &&
  80. // ToIsFile() && FromIsEmpty()`
  81. // - action: `modifyFile(from, to)`
  82. // - advance: `FromNext()` or `FromStep()`
  83. //
  84. // ### E'. file with contents to empty file: 21, 31
  85. // - check: `SameName() && DifferentHash() && FromIsFile() &&
  86. // ToIsFile() && ToIsEmpty()`
  87. // - action: `modifyFile(from, to)`
  88. // - advance: `FromNext()` or `FromStep()`
  89. //
  90. // ### F. empty file to empty dir with the same name: 14
  91. // - check: `SameName() && FromIsFile() && FromIsEmpty() &&
  92. // ToIsDir() && ToIsEmpty()`
  93. // - action: `DeleteFile(from); InsertEmptyDir(to)`
  94. // - advance: `FromNext(); ToNext()`
  95. //
  96. // ### F'. empty dir to empty file of the same name: 41
  97. // - check: `SameName() && FromIsDir() && FromIsEmpty &&
  98. // ToIsFile() && ToIsEmpty()`
  99. // - action: `DeleteEmptyDir(from); InsertFile(to)`
  100. // - advance: `FromNext(); ToNext()` or step for any of them.
  101. //
  102. // ### G. empty file to non-empty dir of the same name: 15, 16
  103. // - check: `SameName() && FromIsFile() && ToIsDir() &&
  104. // FromIsEmpty() && ToIsNotEmpty()`
  105. // - action: `DeleteFile(from); InsertDirRecursively(to)`
  106. // - advance: `FromNext(); ToNext()`
  107. //
  108. // ### G'. non-empty dir to empty file of the same name: 51, 61
  109. // - check: `SameName() && FromIsDir() && FromIsNotEmpty() &&
  110. // ToIsFile() && FromIsEmpty()`
  111. // - action: `DeleteDirRecursively(from); InsertFile(to)`
  112. // - advance: `FromNext(); ToNext()`
  113. //
  114. // ### H. modify file contents: 23, 32
  115. // - check: `SameName() && FromIsFile() && ToIsFile() &&
  116. // FromIsNotEmpty() && ToIsNotEmpty()`
  117. // - action: `ModifyFile(from, to)`
  118. // - advance: `FromNext(); ToNext()`
  119. //
  120. // ### I. file with contents to empty dir: 24, 34
  121. // - check: `SameName() && DifferentHash() && FromIsFile() &&
  122. // FromIsNotEmpty() && ToIsDir() && ToIsEmpty()`
  123. // - action: `DeleteFile(from); InsertEmptyDir(to)`
  124. // - advance: `FromNext(); ToNext()`
  125. //
  126. // ### I'. empty dir to file with contents: 42, 43
  127. // - check: `SameName() && DifferentHash() && FromIsDir() &&
  128. // FromIsEmpty() && ToIsFile() && ToIsEmpty()`
  129. // - action: `DeleteDir(from); InsertFile(to)`
  130. // - advance: `FromNext(); ToNext()`
  131. //
  132. // ### J. file with contents to dir with contetns: 25, 26, 35, 36
  133. // - check: `SameName() && DifferentHash() && FromIsFile() &&
  134. // FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()`
  135. // - action: `DeleteFile(from); InsertDirRecursively(to)`
  136. // - advance: `FromNext(); ToNext()`
  137. //
  138. // ### J'. dir with contetns to file with contents: 52, 62, 53, 63
  139. // - check: `SameName() && DifferentHash() && FromIsDir() &&
  140. // FromIsNotEmpty() && ToIsFile() && ToIsNotEmpty()`
  141. // - action: `DeleteDirRecursively(from); InsertFile(to)`
  142. // - advance: `FromNext(); ToNext()`
  143. //
  144. // ### K. empty dir to dir with contents: 45, 46
  145. // - check: `SameName() && DifferentHash() && FromIsDir() &&
  146. // FromIsEmpty() && ToIsDir() && ToIsNotEmpty()`
  147. // - action: `InsertChildrenRecursively(to)`
  148. // - advance: `FromNext(); ToNext()`
  149. //
  150. // ### K'. dir with contents to empty dir: 54, 64
  151. // - check: `SameName() && DifferentHash() && FromIsDir() &&
  152. // FromIsEmpty() && ToIsDir() && ToIsNotEmpty()`
  153. // - action: `DeleteChildrenRecursively(from)`
  154. // - advance: `FromNext(); ToNext()`
  155. //
  156. // ### L. dir with contents to dir with different contents: 56, 65
  157. // - check: `SameName() && DifferentHash() && FromIsDir() &&
  158. // FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()`
  159. // - action: nothing
  160. // - advance: `FromStep(); ToStep()`
  161. //
  162. //
  163. // All these cases can be further simplified by a truth table
  164. // reduction process, in which we gather similar checks together to
  165. // make the final code easier to read and understand.
  166. //
  167. // The first 6 columns are the outputs of the checks to perform on
  168. // both noders. I have labeled them 1 to 6, this is what they mean:
  169. //
  170. // 1: SameName()
  171. // 2: SameHash()
  172. // 3: FromIsDir()
  173. // 4: ToIsDir()
  174. // 5: FromIsEmpty()
  175. // 6: ToIsEmpty()
  176. //
  177. // The from and to columns are a fsnoder example of the elements
  178. // that you will find on each tree under the specified comparison
  179. // results (columns 1 to 6).
  180. //
  181. // The type column identifies the case we are into, from the list above.
  182. //
  183. // The type' column identifies the new set of reduced cases, using
  184. // lowercase letters, and they are explained after the table.
  185. //
  186. // The last column is the set of actions and advances for each case.
  187. //
  188. // "---" means impossible except in case of hash collision.
  189. //
  190. // advance meaning:
  191. // - NN: from.Next(); to.Next()
  192. // - SS: from.Step(); to.Step()
  193. //
  194. // 1 2 3 4 5 6 | from | to |type|type'|action ; advance
  195. // ------------+--------+--------+----+------------------------------------
  196. // 0 0 0 0 0 0 | | | | | if !SameName() {
  197. // . | | | | | if FromBeforeTo() {
  198. // . | | | D | d | delete(from); from.Next()
  199. // . | | | | | } else {
  200. // . | | | C | c | insert(to); to.Next()
  201. // . | | | | | }
  202. // 0 1 1 1 1 1 | | | | | }
  203. // 1 0 0 0 0 0 | a<1> | a<2> | H | e | modify(from, to); NN
  204. // 1 0 0 0 0 1 | a<1> | a<> | E' | e | modify(from, to); NN
  205. // 1 0 0 0 1 0 | a<> | a<1> | E | e | modify(from, to); NN
  206. // 1 0 0 0 1 1 | ---- | ---- | | e |
  207. // 1 0 0 1 0 0 | a<1> | a(...) | J | f | delete(from); insert(to); NN
  208. // 1 0 0 1 0 1 | a<1> | a() | I | f | delete(from); insert(to); NN
  209. // 1 0 0 1 1 0 | a<> | a(...) | G | f | delete(from); insert(to); NN
  210. // 1 0 0 1 1 1 | a<> | a() | F | f | delete(from); insert(to); NN
  211. // 1 0 1 0 0 0 | a(...) | a<1> | J' | f | delete(from); insert(to); NN
  212. // 1 0 1 0 0 1 | a(...) | a<> | G' | f | delete(from); insert(to); NN
  213. // 1 0 1 0 1 0 | a() | a<1> | I' | f | delete(from); insert(to); NN
  214. // 1 0 1 0 1 1 | a() | a<> | F' | f | delete(from); insert(to); NN
  215. // 1 0 1 1 0 0 | a(...) | a(;;;) | L | g | nothing; SS
  216. // 1 0 1 1 0 1 | a(...) | a() | K' | h | deleteChidren(from); NN
  217. // 1 0 1 1 1 0 | a() | a(...) | K | i | insertChildren(to); NN
  218. // 1 0 1 1 1 1 | ---- | ---- | | |
  219. // 1 1 0 0 0 0 | a<1> | a<1> | B | b | nothing; NN
  220. // 1 1 0 0 0 1 | ---- | ---- | | b |
  221. // 1 1 0 0 1 0 | ---- | ---- | | b |
  222. // 1 1 0 0 1 1 | a<> | a<> | B | b | nothing; NN
  223. // 1 1 0 1 0 0 | ---- | ---- | | b |
  224. // 1 1 0 1 0 1 | ---- | ---- | | b |
  225. // 1 1 0 1 1 0 | ---- | ---- | | b |
  226. // 1 1 0 1 1 1 | ---- | ---- | | b |
  227. // 1 1 1 0 0 0 | ---- | ---- | | b |
  228. // 1 1 1 0 0 1 | ---- | ---- | | b |
  229. // 1 1 1 0 1 0 | ---- | ---- | | b |
  230. // 1 1 1 0 1 1 | ---- | ---- | | b |
  231. // 1 1 1 1 0 0 | a(...) | a(...) | B | b | nothing; NN
  232. // 1 1 1 1 0 1 | ---- | ---- | | b |
  233. // 1 1 1 1 1 0 | ---- | ---- | | b |
  234. // 1 1 1 1 1 1 | a() | a() | B | b | nothing; NN
  235. //
  236. // c and d:
  237. // if !SameName()
  238. // d if FromBeforeTo()
  239. // c else
  240. // b: SameName) && sameHash()
  241. // e: SameName() && !sameHash() && BothAreFiles()
  242. // f: SameName() && !sameHash() && FileAndDir()
  243. // g: SameName() && !sameHash() && BothAreDirs() && NoneIsEmpty
  244. // i: SameName() && !sameHash() && BothAreDirs() && FromIsEmpty
  245. // h: else of i
  246. import (
  247. "context"
  248. "errors"
  249. "fmt"
  250. "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder"
  251. )
  252. var (
  253. ErrCanceled = errors.New("operation canceled")
  254. )
  255. // DiffTree calculates the list of changes between two merkletries. It
  256. // uses the provided hashEqual callback to compare noders.
  257. func DiffTree(fromTree, toTree noder.Noder,
  258. hashEqual noder.Equal) (Changes, error) {
  259. return DiffTreeContext(context.Background(), fromTree, toTree, hashEqual)
  260. }
  261. // DiffTree calculates the list of changes between two merkletries. It
  262. // uses the provided hashEqual callback to compare noders.
  263. // Error will be returned if context expires
  264. // Provided context must be non nil
  265. func DiffTreeContext(ctx context.Context, fromTree, toTree noder.Noder,
  266. hashEqual noder.Equal) (Changes, error) {
  267. ret := NewChanges()
  268. ii, err := newDoubleIter(fromTree, toTree, hashEqual)
  269. if err != nil {
  270. return nil, err
  271. }
  272. for {
  273. select {
  274. case <-ctx.Done():
  275. return nil, ErrCanceled
  276. default:
  277. }
  278. from := ii.from.current
  279. to := ii.to.current
  280. switch r := ii.remaining(); r {
  281. case noMoreNoders:
  282. return ret, nil
  283. case onlyFromRemains:
  284. if err = ret.AddRecursiveDelete(from); err != nil {
  285. return nil, err
  286. }
  287. if err = ii.nextFrom(); err != nil {
  288. return nil, err
  289. }
  290. case onlyToRemains:
  291. if err = ret.AddRecursiveInsert(to); err != nil {
  292. return nil, err
  293. }
  294. if err = ii.nextTo(); err != nil {
  295. return nil, err
  296. }
  297. case bothHaveNodes:
  298. if err = diffNodes(&ret, ii); err != nil {
  299. return nil, err
  300. }
  301. default:
  302. panic(fmt.Sprintf("unknown remaining value: %d", r))
  303. }
  304. }
  305. }
  306. func diffNodes(changes *Changes, ii *doubleIter) error {
  307. from := ii.from.current
  308. to := ii.to.current
  309. var err error
  310. // compare their full paths as strings
  311. switch from.Compare(to) {
  312. case -1:
  313. if err = changes.AddRecursiveDelete(from); err != nil {
  314. return err
  315. }
  316. if err = ii.nextFrom(); err != nil {
  317. return err
  318. }
  319. case 1:
  320. if err = changes.AddRecursiveInsert(to); err != nil {
  321. return err
  322. }
  323. if err = ii.nextTo(); err != nil {
  324. return err
  325. }
  326. default:
  327. if err := diffNodesSameName(changes, ii); err != nil {
  328. return err
  329. }
  330. }
  331. return nil
  332. }
  333. func diffNodesSameName(changes *Changes, ii *doubleIter) error {
  334. from := ii.from.current
  335. to := ii.to.current
  336. status, err := ii.compare()
  337. if err != nil {
  338. return err
  339. }
  340. switch {
  341. case status.sameHash:
  342. // do nothing
  343. if err = ii.nextBoth(); err != nil {
  344. return err
  345. }
  346. case status.bothAreFiles:
  347. changes.Add(NewModify(from, to))
  348. if err = ii.nextBoth(); err != nil {
  349. return err
  350. }
  351. case status.fileAndDir:
  352. if err = changes.AddRecursiveDelete(from); err != nil {
  353. return err
  354. }
  355. if err = changes.AddRecursiveInsert(to); err != nil {
  356. return err
  357. }
  358. if err = ii.nextBoth(); err != nil {
  359. return err
  360. }
  361. case status.bothAreDirs:
  362. if err = diffDirs(changes, ii); err != nil {
  363. return err
  364. }
  365. default:
  366. return fmt.Errorf("bad status from double iterator")
  367. }
  368. return nil
  369. }
  370. func diffDirs(changes *Changes, ii *doubleIter) error {
  371. from := ii.from.current
  372. to := ii.to.current
  373. status, err := ii.compare()
  374. if err != nil {
  375. return err
  376. }
  377. switch {
  378. case status.fromIsEmptyDir:
  379. if err = changes.AddRecursiveInsert(to); err != nil {
  380. return err
  381. }
  382. if err = ii.nextBoth(); err != nil {
  383. return err
  384. }
  385. case status.toIsEmptyDir:
  386. if err = changes.AddRecursiveDelete(from); err != nil {
  387. return err
  388. }
  389. if err = ii.nextBoth(); err != nil {
  390. return err
  391. }
  392. case !status.fromIsEmptyDir && !status.toIsEmptyDir:
  393. // do nothing
  394. if err = ii.stepBoth(); err != nil {
  395. return err
  396. }
  397. default:
  398. return fmt.Errorf("both dirs are empty but has different hash")
  399. }
  400. return nil
  401. }
上海开阖软件有限公司 沪ICP备12045867号-1