本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

857 lines
26KB

  1. // Copyright 2011 The Snappy-Go Authors. All rights reserved.
  2. // Modified for deflate by Klaus Post (c) 2015.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package flate
  6. // emitLiteral writes a literal chunk and returns the number of bytes written.
  7. func emitLiteral(dst *tokens, lit []byte) {
  8. ol := int(dst.n)
  9. for i, v := range lit {
  10. dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
  11. }
  12. dst.n += uint16(len(lit))
  13. }
  14. // emitCopy writes a copy chunk and returns the number of bytes written.
  15. func emitCopy(dst *tokens, offset, length int) {
  16. dst.tokens[dst.n] = matchToken(uint32(length-3), uint32(offset-minOffsetSize))
  17. dst.n++
  18. }
  19. type snappyEnc interface {
  20. Encode(dst *tokens, src []byte)
  21. Reset()
  22. }
  23. func newSnappy(level int) snappyEnc {
  24. switch level {
  25. case 1:
  26. return &snappyL1{}
  27. case 2:
  28. return &snappyL2{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
  29. case 3:
  30. return &snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}
  31. case 4:
  32. return &snappyL4{snappyL3{snappyGen: snappyGen{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}}}
  33. default:
  34. panic("invalid level specified")
  35. }
  36. }
  37. const (
  38. tableBits = 14 // Bits used in the table
  39. tableSize = 1 << tableBits // Size of the table
  40. tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
  41. tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
  42. baseMatchOffset = 1 // The smallest match offset
  43. baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
  44. maxMatchOffset = 1 << 15 // The largest match offset
  45. )
  46. func load32(b []byte, i int) uint32 {
  47. b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
  48. return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
  49. }
  50. func load64(b []byte, i int) uint64 {
  51. b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
  52. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
  53. uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
  54. }
  55. func hash(u uint32) uint32 {
  56. return (u * 0x1e35a7bd) >> tableShift
  57. }
  58. // snappyL1 encapsulates level 1 compression
  59. type snappyL1 struct{}
  60. func (e *snappyL1) Reset() {}
  61. func (e *snappyL1) Encode(dst *tokens, src []byte) {
  62. const (
  63. inputMargin = 16 - 1
  64. minNonLiteralBlockSize = 1 + 1 + inputMargin
  65. )
  66. // This check isn't in the Snappy implementation, but there, the caller
  67. // instead of the callee handles this case.
  68. if len(src) < minNonLiteralBlockSize {
  69. // We do not fill the token table.
  70. // This will be picked up by caller.
  71. dst.n = uint16(len(src))
  72. return
  73. }
  74. // Initialize the hash table.
  75. //
  76. // The table element type is uint16, as s < sLimit and sLimit < len(src)
  77. // and len(src) <= maxStoreBlockSize and maxStoreBlockSize == 65535.
  78. var table [tableSize]uint16
  79. // sLimit is when to stop looking for offset/length copies. The inputMargin
  80. // lets us use a fast path for emitLiteral in the main loop, while we are
  81. // looking for copies.
  82. sLimit := len(src) - inputMargin
  83. // nextEmit is where in src the next emitLiteral should start from.
  84. nextEmit := 0
  85. // The encoded form must start with a literal, as there are no previous
  86. // bytes to copy, so we start looking for hash matches at s == 1.
  87. s := 1
  88. nextHash := hash(load32(src, s))
  89. for {
  90. // Copied from the C++ snappy implementation:
  91. //
  92. // Heuristic match skipping: If 32 bytes are scanned with no matches
  93. // found, start looking only at every other byte. If 32 more bytes are
  94. // scanned (or skipped), look at every third byte, etc.. When a match
  95. // is found, immediately go back to looking at every byte. This is a
  96. // small loss (~5% performance, ~0.1% density) for compressible data
  97. // due to more bookkeeping, but for non-compressible data (such as
  98. // JPEG) it's a huge win since the compressor quickly "realizes" the
  99. // data is incompressible and doesn't bother looking for matches
  100. // everywhere.
  101. //
  102. // The "skip" variable keeps track of how many bytes there are since
  103. // the last match; dividing it by 32 (ie. right-shifting by five) gives
  104. // the number of bytes to move ahead for each iteration.
  105. skip := 32
  106. nextS := s
  107. candidate := 0
  108. for {
  109. s = nextS
  110. bytesBetweenHashLookups := skip >> 5
  111. nextS = s + bytesBetweenHashLookups
  112. skip += bytesBetweenHashLookups
  113. if nextS > sLimit {
  114. goto emitRemainder
  115. }
  116. candidate = int(table[nextHash&tableMask])
  117. table[nextHash&tableMask] = uint16(s)
  118. nextHash = hash(load32(src, nextS))
  119. // TODO: < should be <=, and add a test for that.
  120. if s-candidate < maxMatchOffset && load32(src, s) == load32(src, candidate) {
  121. break
  122. }
  123. }
  124. // A 4-byte match has been found. We'll later see if more than 4 bytes
  125. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  126. // them as literal bytes.
  127. emitLiteral(dst, src[nextEmit:s])
  128. // Call emitCopy, and then see if another emitCopy could be our next
  129. // move. Repeat until we find no match for the input immediately after
  130. // what was consumed by the last emitCopy call.
  131. //
  132. // If we exit this loop normally then we need to call emitLiteral next,
  133. // though we don't yet know how big the literal will be. We handle that
  134. // by proceeding to the next iteration of the main loop. We also can
  135. // exit this loop via goto if we get close to exhausting the input.
  136. for {
  137. // Invariant: we have a 4-byte match at s, and no need to emit any
  138. // literal bytes prior to s.
  139. base := s
  140. // Extend the 4-byte match as long as possible.
  141. //
  142. // This is an inlined version of Snappy's:
  143. // s = extendMatch(src, candidate+4, s+4)
  144. s += 4
  145. s1 := base + maxMatchLength
  146. if s1 > len(src) {
  147. s1 = len(src)
  148. }
  149. a := src[s:s1]
  150. b := src[candidate+4:]
  151. b = b[:len(a)]
  152. l := len(a)
  153. for i := range a {
  154. if a[i] != b[i] {
  155. l = i
  156. break
  157. }
  158. }
  159. s += l
  160. // matchToken is flate's equivalent of Snappy's emitCopy.
  161. dst.tokens[dst.n] = matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset))
  162. dst.n++
  163. nextEmit = s
  164. if s >= sLimit {
  165. goto emitRemainder
  166. }
  167. // We could immediately start working at s now, but to improve
  168. // compression we first update the hash table at s-1 and at s. If
  169. // another emitCopy is not our next move, also calculate nextHash
  170. // at s+1. At least on GOARCH=amd64, these three hash calculations
  171. // are faster as one load64 call (with some shifts) instead of
  172. // three load32 calls.
  173. x := load64(src, s-1)
  174. prevHash := hash(uint32(x >> 0))
  175. table[prevHash&tableMask] = uint16(s - 1)
  176. currHash := hash(uint32(x >> 8))
  177. candidate = int(table[currHash&tableMask])
  178. table[currHash&tableMask] = uint16(s)
  179. // TODO: >= should be >, and add a test for that.
  180. if s-candidate >= maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
  181. nextHash = hash(uint32(x >> 16))
  182. s++
  183. break
  184. }
  185. }
  186. }
  187. emitRemainder:
  188. if nextEmit < len(src) {
  189. emitLiteral(dst, src[nextEmit:])
  190. }
  191. }
  192. type tableEntry struct {
  193. val uint32
  194. offset int32
  195. }
  196. func load3232(b []byte, i int32) uint32 {
  197. b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
  198. return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
  199. }
  200. func load6432(b []byte, i int32) uint64 {
  201. b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
  202. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
  203. uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
  204. }
  205. // snappyGen maintains the table for matches,
  206. // and the previous byte block for level 2.
  207. // This is the generic implementation.
  208. type snappyGen struct {
  209. prev []byte
  210. cur int32
  211. }
  212. // snappyGen maintains the table for matches,
  213. // and the previous byte block for level 2.
  214. // This is the generic implementation.
  215. type snappyL2 struct {
  216. snappyGen
  217. table [tableSize]tableEntry
  218. }
  219. // EncodeL2 uses a similar algorithm to level 1, but is capable
  220. // of matching across blocks giving better compression at a small slowdown.
  221. func (e *snappyL2) Encode(dst *tokens, src []byte) {
  222. const (
  223. inputMargin = 16 - 1
  224. minNonLiteralBlockSize = 1 + 1 + inputMargin
  225. )
  226. // Ensure that e.cur doesn't wrap, mainly an issue on 32 bits.
  227. if e.cur > 1<<30 {
  228. for i := range e.table {
  229. e.table[i] = tableEntry{}
  230. }
  231. e.cur = maxStoreBlockSize
  232. }
  233. // This check isn't in the Snappy implementation, but there, the caller
  234. // instead of the callee handles this case.
  235. if len(src) < minNonLiteralBlockSize {
  236. // We do not fill the token table.
  237. // This will be picked up by caller.
  238. dst.n = uint16(len(src))
  239. e.cur += maxStoreBlockSize
  240. e.prev = e.prev[:0]
  241. return
  242. }
  243. // sLimit is when to stop looking for offset/length copies. The inputMargin
  244. // lets us use a fast path for emitLiteral in the main loop, while we are
  245. // looking for copies.
  246. sLimit := int32(len(src) - inputMargin)
  247. // nextEmit is where in src the next emitLiteral should start from.
  248. nextEmit := int32(0)
  249. s := int32(0)
  250. cv := load3232(src, s)
  251. nextHash := hash(cv)
  252. for {
  253. // Copied from the C++ snappy implementation:
  254. //
  255. // Heuristic match skipping: If 32 bytes are scanned with no matches
  256. // found, start looking only at every other byte. If 32 more bytes are
  257. // scanned (or skipped), look at every third byte, etc.. When a match
  258. // is found, immediately go back to looking at every byte. This is a
  259. // small loss (~5% performance, ~0.1% density) for compressible data
  260. // due to more bookkeeping, but for non-compressible data (such as
  261. // JPEG) it's a huge win since the compressor quickly "realizes" the
  262. // data is incompressible and doesn't bother looking for matches
  263. // everywhere.
  264. //
  265. // The "skip" variable keeps track of how many bytes there are since
  266. // the last match; dividing it by 32 (ie. right-shifting by five) gives
  267. // the number of bytes to move ahead for each iteration.
  268. skip := int32(32)
  269. nextS := s
  270. var candidate tableEntry
  271. for {
  272. s = nextS
  273. bytesBetweenHashLookups := skip >> 5
  274. nextS = s + bytesBetweenHashLookups
  275. skip += bytesBetweenHashLookups
  276. if nextS > sLimit {
  277. goto emitRemainder
  278. }
  279. candidate = e.table[nextHash&tableMask]
  280. now := load3232(src, nextS)
  281. e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
  282. nextHash = hash(now)
  283. offset := s - (candidate.offset - e.cur)
  284. if offset >= maxMatchOffset || cv != candidate.val {
  285. // Out of range or not matched.
  286. cv = now
  287. continue
  288. }
  289. break
  290. }
  291. // A 4-byte match has been found. We'll later see if more than 4 bytes
  292. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  293. // them as literal bytes.
  294. emitLiteral(dst, src[nextEmit:s])
  295. // Call emitCopy, and then see if another emitCopy could be our next
  296. // move. Repeat until we find no match for the input immediately after
  297. // what was consumed by the last emitCopy call.
  298. //
  299. // If we exit this loop normally then we need to call emitLiteral next,
  300. // though we don't yet know how big the literal will be. We handle that
  301. // by proceeding to the next iteration of the main loop. We also can
  302. // exit this loop via goto if we get close to exhausting the input.
  303. for {
  304. // Invariant: we have a 4-byte match at s, and no need to emit any
  305. // literal bytes prior to s.
  306. // Extend the 4-byte match as long as possible.
  307. //
  308. s += 4
  309. t := candidate.offset - e.cur + 4
  310. l := e.matchlen(s, t, src)
  311. // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
  312. dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
  313. dst.n++
  314. s += l
  315. nextEmit = s
  316. if s >= sLimit {
  317. goto emitRemainder
  318. }
  319. // We could immediately start working at s now, but to improve
  320. // compression we first update the hash table at s-1 and at s. If
  321. // another emitCopy is not our next move, also calculate nextHash
  322. // at s+1. At least on GOARCH=amd64, these three hash calculations
  323. // are faster as one load64 call (with some shifts) instead of
  324. // three load32 calls.
  325. x := load6432(src, s-1)
  326. prevHash := hash(uint32(x))
  327. e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
  328. x >>= 8
  329. currHash := hash(uint32(x))
  330. candidate = e.table[currHash&tableMask]
  331. e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
  332. offset := s - (candidate.offset - e.cur)
  333. if offset >= maxMatchOffset || uint32(x) != candidate.val {
  334. cv = uint32(x >> 8)
  335. nextHash = hash(cv)
  336. s++
  337. break
  338. }
  339. }
  340. }
  341. emitRemainder:
  342. if int(nextEmit) < len(src) {
  343. emitLiteral(dst, src[nextEmit:])
  344. }
  345. e.cur += int32(len(src))
  346. e.prev = e.prev[:len(src)]
  347. copy(e.prev, src)
  348. }
  349. type tableEntryPrev struct {
  350. Cur tableEntry
  351. Prev tableEntry
  352. }
  353. // snappyL3
  354. type snappyL3 struct {
  355. snappyGen
  356. table [tableSize]tableEntryPrev
  357. }
  358. // Encode uses a similar algorithm to level 2, will check up to two candidates.
  359. func (e *snappyL3) Encode(dst *tokens, src []byte) {
  360. const (
  361. inputMargin = 16 - 1
  362. minNonLiteralBlockSize = 1 + 1 + inputMargin
  363. )
  364. // Ensure that e.cur doesn't wrap, mainly an issue on 32 bits.
  365. if e.cur > 1<<30 {
  366. for i := range e.table {
  367. e.table[i] = tableEntryPrev{}
  368. }
  369. e.cur = maxStoreBlockSize
  370. }
  371. // This check isn't in the Snappy implementation, but there, the caller
  372. // instead of the callee handles this case.
  373. if len(src) < minNonLiteralBlockSize {
  374. // We do not fill the token table.
  375. // This will be picked up by caller.
  376. dst.n = uint16(len(src))
  377. e.cur += maxStoreBlockSize
  378. e.prev = e.prev[:0]
  379. return
  380. }
  381. // sLimit is when to stop looking for offset/length copies. The inputMargin
  382. // lets us use a fast path for emitLiteral in the main loop, while we are
  383. // looking for copies.
  384. sLimit := int32(len(src) - inputMargin)
  385. // nextEmit is where in src the next emitLiteral should start from.
  386. nextEmit := int32(0)
  387. s := int32(0)
  388. cv := load3232(src, s)
  389. nextHash := hash(cv)
  390. for {
  391. // Copied from the C++ snappy implementation:
  392. //
  393. // Heuristic match skipping: If 32 bytes are scanned with no matches
  394. // found, start looking only at every other byte. If 32 more bytes are
  395. // scanned (or skipped), look at every third byte, etc.. When a match
  396. // is found, immediately go back to looking at every byte. This is a
  397. // small loss (~5% performance, ~0.1% density) for compressible data
  398. // due to more bookkeeping, but for non-compressible data (such as
  399. // JPEG) it's a huge win since the compressor quickly "realizes" the
  400. // data is incompressible and doesn't bother looking for matches
  401. // everywhere.
  402. //
  403. // The "skip" variable keeps track of how many bytes there are since
  404. // the last match; dividing it by 32 (ie. right-shifting by five) gives
  405. // the number of bytes to move ahead for each iteration.
  406. skip := int32(32)
  407. nextS := s
  408. var candidate tableEntry
  409. for {
  410. s = nextS
  411. bytesBetweenHashLookups := skip >> 5
  412. nextS = s + bytesBetweenHashLookups
  413. skip += bytesBetweenHashLookups
  414. if nextS > sLimit {
  415. goto emitRemainder
  416. }
  417. candidates := e.table[nextHash&tableMask]
  418. now := load3232(src, nextS)
  419. e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
  420. nextHash = hash(now)
  421. // Check both candidates
  422. candidate = candidates.Cur
  423. if cv == candidate.val {
  424. offset := s - (candidate.offset - e.cur)
  425. if offset < maxMatchOffset {
  426. break
  427. }
  428. } else {
  429. // We only check if value mismatches.
  430. // Offset will always be invalid in other cases.
  431. candidate = candidates.Prev
  432. if cv == candidate.val {
  433. offset := s - (candidate.offset - e.cur)
  434. if offset < maxMatchOffset {
  435. break
  436. }
  437. }
  438. }
  439. cv = now
  440. }
  441. // A 4-byte match has been found. We'll later see if more than 4 bytes
  442. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  443. // them as literal bytes.
  444. emitLiteral(dst, src[nextEmit:s])
  445. // Call emitCopy, and then see if another emitCopy could be our next
  446. // move. Repeat until we find no match for the input immediately after
  447. // what was consumed by the last emitCopy call.
  448. //
  449. // If we exit this loop normally then we need to call emitLiteral next,
  450. // though we don't yet know how big the literal will be. We handle that
  451. // by proceeding to the next iteration of the main loop. We also can
  452. // exit this loop via goto if we get close to exhausting the input.
  453. for {
  454. // Invariant: we have a 4-byte match at s, and no need to emit any
  455. // literal bytes prior to s.
  456. // Extend the 4-byte match as long as possible.
  457. //
  458. s += 4
  459. t := candidate.offset - e.cur + 4
  460. l := e.matchlen(s, t, src)
  461. // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
  462. dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
  463. dst.n++
  464. s += l
  465. nextEmit = s
  466. if s >= sLimit {
  467. goto emitRemainder
  468. }
  469. // We could immediately start working at s now, but to improve
  470. // compression we first update the hash table at s-2, s-1 and at s. If
  471. // another emitCopy is not our next move, also calculate nextHash
  472. // at s+1. At least on GOARCH=amd64, these three hash calculations
  473. // are faster as one load64 call (with some shifts) instead of
  474. // three load32 calls.
  475. x := load6432(src, s-2)
  476. prevHash := hash(uint32(x))
  477. e.table[prevHash&tableMask] = tableEntryPrev{
  478. Prev: e.table[prevHash&tableMask].Cur,
  479. Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
  480. }
  481. x >>= 8
  482. prevHash = hash(uint32(x))
  483. e.table[prevHash&tableMask] = tableEntryPrev{
  484. Prev: e.table[prevHash&tableMask].Cur,
  485. Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
  486. }
  487. x >>= 8
  488. currHash := hash(uint32(x))
  489. candidates := e.table[currHash&tableMask]
  490. cv = uint32(x)
  491. e.table[currHash&tableMask] = tableEntryPrev{
  492. Prev: candidates.Cur,
  493. Cur: tableEntry{offset: s + e.cur, val: cv},
  494. }
  495. // Check both candidates
  496. candidate = candidates.Cur
  497. if cv == candidate.val {
  498. offset := s - (candidate.offset - e.cur)
  499. if offset < maxMatchOffset {
  500. continue
  501. }
  502. } else {
  503. // We only check if value mismatches.
  504. // Offset will always be invalid in other cases.
  505. candidate = candidates.Prev
  506. if cv == candidate.val {
  507. offset := s - (candidate.offset - e.cur)
  508. if offset < maxMatchOffset {
  509. continue
  510. }
  511. }
  512. }
  513. cv = uint32(x >> 8)
  514. nextHash = hash(cv)
  515. s++
  516. break
  517. }
  518. }
  519. emitRemainder:
  520. if int(nextEmit) < len(src) {
  521. emitLiteral(dst, src[nextEmit:])
  522. }
  523. e.cur += int32(len(src))
  524. e.prev = e.prev[:len(src)]
  525. copy(e.prev, src)
  526. }
  527. // snappyL4
  528. type snappyL4 struct {
  529. snappyL3
  530. }
  531. // Encode uses a similar algorithm to level 3,
  532. // but will check up to two candidates if first isn't long enough.
  533. func (e *snappyL4) Encode(dst *tokens, src []byte) {
  534. const (
  535. inputMargin = 16 - 1
  536. minNonLiteralBlockSize = 1 + 1 + inputMargin
  537. matchLenGood = 12
  538. )
  539. // Ensure that e.cur doesn't wrap, mainly an issue on 32 bits.
  540. if e.cur > 1<<30 {
  541. for i := range e.table {
  542. e.table[i] = tableEntryPrev{}
  543. }
  544. e.cur = maxStoreBlockSize
  545. }
  546. // This check isn't in the Snappy implementation, but there, the caller
  547. // instead of the callee handles this case.
  548. if len(src) < minNonLiteralBlockSize {
  549. // We do not fill the token table.
  550. // This will be picked up by caller.
  551. dst.n = uint16(len(src))
  552. e.cur += maxStoreBlockSize
  553. e.prev = e.prev[:0]
  554. return
  555. }
  556. // sLimit is when to stop looking for offset/length copies. The inputMargin
  557. // lets us use a fast path for emitLiteral in the main loop, while we are
  558. // looking for copies.
  559. sLimit := int32(len(src) - inputMargin)
  560. // nextEmit is where in src the next emitLiteral should start from.
  561. nextEmit := int32(0)
  562. s := int32(0)
  563. cv := load3232(src, s)
  564. nextHash := hash(cv)
  565. for {
  566. // Copied from the C++ snappy implementation:
  567. //
  568. // Heuristic match skipping: If 32 bytes are scanned with no matches
  569. // found, start looking only at every other byte. If 32 more bytes are
  570. // scanned (or skipped), look at every third byte, etc.. When a match
  571. // is found, immediately go back to looking at every byte. This is a
  572. // small loss (~5% performance, ~0.1% density) for compressible data
  573. // due to more bookkeeping, but for non-compressible data (such as
  574. // JPEG) it's a huge win since the compressor quickly "realizes" the
  575. // data is incompressible and doesn't bother looking for matches
  576. // everywhere.
  577. //
  578. // The "skip" variable keeps track of how many bytes there are since
  579. // the last match; dividing it by 32 (ie. right-shifting by five) gives
  580. // the number of bytes to move ahead for each iteration.
  581. skip := int32(32)
  582. nextS := s
  583. var candidate tableEntry
  584. var candidateAlt tableEntry
  585. for {
  586. s = nextS
  587. bytesBetweenHashLookups := skip >> 5
  588. nextS = s + bytesBetweenHashLookups
  589. skip += bytesBetweenHashLookups
  590. if nextS > sLimit {
  591. goto emitRemainder
  592. }
  593. candidates := e.table[nextHash&tableMask]
  594. now := load3232(src, nextS)
  595. e.table[nextHash&tableMask] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
  596. nextHash = hash(now)
  597. // Check both candidates
  598. candidate = candidates.Cur
  599. if cv == candidate.val {
  600. offset := s - (candidate.offset - e.cur)
  601. if offset < maxMatchOffset {
  602. offset = s - (candidates.Prev.offset - e.cur)
  603. if cv == candidates.Prev.val && offset < maxMatchOffset {
  604. candidateAlt = candidates.Prev
  605. }
  606. break
  607. }
  608. } else {
  609. // We only check if value mismatches.
  610. // Offset will always be invalid in other cases.
  611. candidate = candidates.Prev
  612. if cv == candidate.val {
  613. offset := s - (candidate.offset - e.cur)
  614. if offset < maxMatchOffset {
  615. break
  616. }
  617. }
  618. }
  619. cv = now
  620. }
  621. // A 4-byte match has been found. We'll later see if more than 4 bytes
  622. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  623. // them as literal bytes.
  624. emitLiteral(dst, src[nextEmit:s])
  625. // Call emitCopy, and then see if another emitCopy could be our next
  626. // move. Repeat until we find no match for the input immediately after
  627. // what was consumed by the last emitCopy call.
  628. //
  629. // If we exit this loop normally then we need to call emitLiteral next,
  630. // though we don't yet know how big the literal will be. We handle that
  631. // by proceeding to the next iteration of the main loop. We also can
  632. // exit this loop via goto if we get close to exhausting the input.
  633. for {
  634. // Invariant: we have a 4-byte match at s, and no need to emit any
  635. // literal bytes prior to s.
  636. // Extend the 4-byte match as long as possible.
  637. //
  638. s += 4
  639. t := candidate.offset - e.cur + 4
  640. l := e.matchlen(s, t, src)
  641. // Try alternative candidate if match length < matchLenGood.
  642. if l < matchLenGood-4 && candidateAlt.offset != 0 {
  643. t2 := candidateAlt.offset - e.cur + 4
  644. l2 := e.matchlen(s, t2, src)
  645. if l2 > l {
  646. l = l2
  647. t = t2
  648. }
  649. }
  650. // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
  651. dst.tokens[dst.n] = matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset))
  652. dst.n++
  653. s += l
  654. nextEmit = s
  655. if s >= sLimit {
  656. goto emitRemainder
  657. }
  658. // We could immediately start working at s now, but to improve
  659. // compression we first update the hash table at s-2, s-1 and at s. If
  660. // another emitCopy is not our next move, also calculate nextHash
  661. // at s+1. At least on GOARCH=amd64, these three hash calculations
  662. // are faster as one load64 call (with some shifts) instead of
  663. // three load32 calls.
  664. x := load6432(src, s-2)
  665. prevHash := hash(uint32(x))
  666. e.table[prevHash&tableMask] = tableEntryPrev{
  667. Prev: e.table[prevHash&tableMask].Cur,
  668. Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
  669. }
  670. x >>= 8
  671. prevHash = hash(uint32(x))
  672. e.table[prevHash&tableMask] = tableEntryPrev{
  673. Prev: e.table[prevHash&tableMask].Cur,
  674. Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
  675. }
  676. x >>= 8
  677. currHash := hash(uint32(x))
  678. candidates := e.table[currHash&tableMask]
  679. cv = uint32(x)
  680. e.table[currHash&tableMask] = tableEntryPrev{
  681. Prev: candidates.Cur,
  682. Cur: tableEntry{offset: s + e.cur, val: cv},
  683. }
  684. // Check both candidates
  685. candidate = candidates.Cur
  686. candidateAlt = tableEntry{}
  687. if cv == candidate.val {
  688. offset := s - (candidate.offset - e.cur)
  689. if offset < maxMatchOffset {
  690. offset = s - (candidates.Prev.offset - e.cur)
  691. if cv == candidates.Prev.val && offset < maxMatchOffset {
  692. candidateAlt = candidates.Prev
  693. }
  694. continue
  695. }
  696. } else {
  697. // We only check if value mismatches.
  698. // Offset will always be invalid in other cases.
  699. candidate = candidates.Prev
  700. if cv == candidate.val {
  701. offset := s - (candidate.offset - e.cur)
  702. if offset < maxMatchOffset {
  703. continue
  704. }
  705. }
  706. }
  707. cv = uint32(x >> 8)
  708. nextHash = hash(cv)
  709. s++
  710. break
  711. }
  712. }
  713. emitRemainder:
  714. if int(nextEmit) < len(src) {
  715. emitLiteral(dst, src[nextEmit:])
  716. }
  717. e.cur += int32(len(src))
  718. e.prev = e.prev[:len(src)]
  719. copy(e.prev, src)
  720. }
  721. func (e *snappyGen) matchlen(s, t int32, src []byte) int32 {
  722. s1 := int(s) + maxMatchLength - 4
  723. if s1 > len(src) {
  724. s1 = len(src)
  725. }
  726. // If we are inside the current block
  727. if t >= 0 {
  728. b := src[t:]
  729. a := src[s:s1]
  730. b = b[:len(a)]
  731. // Extend the match to be as long as possible.
  732. for i := range a {
  733. if a[i] != b[i] {
  734. return int32(i)
  735. }
  736. }
  737. return int32(len(a))
  738. }
  739. // We found a match in the previous block.
  740. tp := int32(len(e.prev)) + t
  741. if tp < 0 {
  742. return 0
  743. }
  744. // Extend the match to be as long as possible.
  745. a := src[s:s1]
  746. b := e.prev[tp:]
  747. if len(b) > len(a) {
  748. b = b[:len(a)]
  749. }
  750. a = a[:len(b)]
  751. for i := range b {
  752. if a[i] != b[i] {
  753. return int32(i)
  754. }
  755. }
  756. n := int32(len(b))
  757. a = src[s+n : s1]
  758. b = src[:len(a)]
  759. for i := range a {
  760. if a[i] != b[i] {
  761. return int32(i) + n
  762. }
  763. }
  764. return int32(len(a)) + n
  765. }
  766. // Reset the encoding table.
  767. func (e *snappyGen) Reset() {
  768. e.prev = e.prev[:0]
  769. e.cur += maxMatchOffset + 1
  770. }
上海开阖软件有限公司 沪ICP备12045867号-1