本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1748 lines
44KB

  1. package roaring
  2. //
  3. // Copyright (c) 2016 by the roaring authors.
  4. // Licensed under the Apache License, Version 2.0.
  5. //
  6. // We derive a few lines of code from the sort.Search
  7. // function in the golang standard library. That function
  8. // is Copyright 2009 The Go Authors, and licensed
  9. // under the following BSD-style license.
  10. /*
  11. Copyright (c) 2009 The Go Authors. All rights reserved.
  12. Redistribution and use in source and binary forms, with or without
  13. modification, are permitted provided that the following conditions are
  14. met:
  15. * Redistributions of source code must retain the above copyright
  16. notice, this list of conditions and the following disclaimer.
  17. * Redistributions in binary form must reproduce the above
  18. copyright notice, this list of conditions and the following disclaimer
  19. in the documentation and/or other materials provided with the
  20. distribution.
  21. * Neither the name of Google Inc. nor the names of its
  22. contributors may be used to endorse or promote products derived from
  23. this software without specific prior written permission.
  24. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  25. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  26. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  27. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  28. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  30. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  31. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  32. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  33. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  34. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35. */
  36. import (
  37. "fmt"
  38. "sort"
  39. "unsafe"
  40. )
  41. //go:generate msgp -unexported
  42. // runContainer16 does run-length encoding of sets of
  43. // uint16 integers.
  44. type runContainer16 struct {
  45. iv []interval16
  46. card int64
  47. // avoid allocation during search
  48. myOpts searchOptions `msg:"-"`
  49. }
  50. // interval16 is the internal to runContainer16
  51. // structure that maintains the individual [start, last]
  52. // closed intervals.
  53. type interval16 struct {
  54. start uint16
  55. length uint16 // length minus 1
  56. }
  57. func newInterval16Range(start, last uint16) interval16 {
  58. if last < start {
  59. panic(fmt.Sprintf("last (%d) cannot be smaller than start (%d)", last, start))
  60. }
  61. return interval16{
  62. start,
  63. last - start,
  64. }
  65. }
  66. // runlen returns the count of integers in the interval.
  67. func (iv interval16) runlen() int64 {
  68. return int64(iv.length) + 1
  69. }
  70. func (iv interval16) last() uint16 {
  71. return iv.start + iv.length
  72. }
  73. // String produces a human viewable string of the contents.
  74. func (iv interval16) String() string {
  75. return fmt.Sprintf("[%d, %d]", iv.start, iv.length)
  76. }
  77. func ivalString16(iv []interval16) string {
  78. var s string
  79. var j int
  80. var p interval16
  81. for j, p = range iv {
  82. s += fmt.Sprintf("%v:[%d, %d], ", j, p.start, p.last())
  83. }
  84. return s
  85. }
  86. // String produces a human viewable string of the contents.
  87. func (rc *runContainer16) String() string {
  88. if len(rc.iv) == 0 {
  89. return "runContainer16{}"
  90. }
  91. is := ivalString16(rc.iv)
  92. return `runContainer16{` + is + `}`
  93. }
  94. // uint16Slice is a sort.Sort convenience method
  95. type uint16Slice []uint16
  96. // Len returns the length of p.
  97. func (p uint16Slice) Len() int { return len(p) }
  98. // Less returns p[i] < p[j]
  99. func (p uint16Slice) Less(i, j int) bool { return p[i] < p[j] }
  100. // Swap swaps elements i and j.
  101. func (p uint16Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
  102. //msgp:ignore addHelper
  103. // addHelper helps build a runContainer16.
  104. type addHelper16 struct {
  105. runstart uint16
  106. runlen uint16
  107. actuallyAdded uint16
  108. m []interval16
  109. rc *runContainer16
  110. }
  111. func (ah *addHelper16) storeIval(runstart, runlen uint16) {
  112. mi := interval16{start: runstart, length: runlen}
  113. ah.m = append(ah.m, mi)
  114. }
  115. func (ah *addHelper16) add(cur, prev uint16, i int) {
  116. if cur == prev+1 {
  117. ah.runlen++
  118. ah.actuallyAdded++
  119. } else {
  120. if cur < prev {
  121. panic(fmt.Sprintf("newRunContainer16FromVals sees "+
  122. "unsorted vals; vals[%v]=cur=%v < prev=%v. Sort your vals"+
  123. " before calling us with alreadySorted == true.", i, cur, prev))
  124. }
  125. if cur == prev {
  126. // ignore duplicates
  127. } else {
  128. ah.actuallyAdded++
  129. ah.storeIval(ah.runstart, ah.runlen)
  130. ah.runstart = cur
  131. ah.runlen = 0
  132. }
  133. }
  134. }
  135. // newRunContainerRange makes a new container made of just the specified closed interval [rangestart,rangelast]
  136. func newRunContainer16Range(rangestart uint16, rangelast uint16) *runContainer16 {
  137. rc := &runContainer16{}
  138. rc.iv = append(rc.iv, newInterval16Range(rangestart, rangelast))
  139. return rc
  140. }
  141. // newRunContainer16FromVals makes a new container from vals.
  142. //
  143. // For efficiency, vals should be sorted in ascending order.
  144. // Ideally vals should not contain duplicates, but we detect and
  145. // ignore them. If vals is already sorted in ascending order, then
  146. // pass alreadySorted = true. Otherwise, for !alreadySorted,
  147. // we will sort vals before creating a runContainer16 of them.
  148. // We sort the original vals, so this will change what the
  149. // caller sees in vals as a side effect.
  150. func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer16 {
  151. // keep this in sync with newRunContainer16FromArray below
  152. rc := &runContainer16{}
  153. ah := addHelper16{rc: rc}
  154. if !alreadySorted {
  155. sort.Sort(uint16Slice(vals))
  156. }
  157. n := len(vals)
  158. var cur, prev uint16
  159. switch {
  160. case n == 0:
  161. // nothing more
  162. case n == 1:
  163. ah.m = append(ah.m, newInterval16Range(vals[0], vals[0]))
  164. ah.actuallyAdded++
  165. default:
  166. ah.runstart = vals[0]
  167. ah.actuallyAdded++
  168. for i := 1; i < n; i++ {
  169. prev = vals[i-1]
  170. cur = vals[i]
  171. ah.add(cur, prev, i)
  172. }
  173. ah.storeIval(ah.runstart, ah.runlen)
  174. }
  175. rc.iv = ah.m
  176. rc.card = int64(ah.actuallyAdded)
  177. return rc
  178. }
  179. // newRunContainer16FromBitmapContainer makes a new run container from bc,
  180. // somewhat efficiently. For reference, see the Java
  181. // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192
  182. func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 {
  183. rc := &runContainer16{}
  184. nbrRuns := bc.numberOfRuns()
  185. if nbrRuns == 0 {
  186. return rc
  187. }
  188. rc.iv = make([]interval16, nbrRuns)
  189. longCtr := 0 // index of current long in bitmap
  190. curWord := bc.bitmap[0] // its value
  191. runCount := 0
  192. for {
  193. // potentially multiword advance to first 1 bit
  194. for curWord == 0 && longCtr < len(bc.bitmap)-1 {
  195. longCtr++
  196. curWord = bc.bitmap[longCtr]
  197. }
  198. if curWord == 0 {
  199. // wrap up, no more runs
  200. return rc
  201. }
  202. localRunStart := countTrailingZeros(curWord)
  203. runStart := localRunStart + 64*longCtr
  204. // stuff 1s into number's LSBs
  205. curWordWith1s := curWord | (curWord - 1)
  206. // find the next 0, potentially in a later word
  207. runEnd := 0
  208. for curWordWith1s == maxWord && longCtr < len(bc.bitmap)-1 {
  209. longCtr++
  210. curWordWith1s = bc.bitmap[longCtr]
  211. }
  212. if curWordWith1s == maxWord {
  213. // a final unterminated run of 1s
  214. runEnd = wordSizeInBits + longCtr*64
  215. rc.iv[runCount].start = uint16(runStart)
  216. rc.iv[runCount].length = uint16(runEnd) - uint16(runStart) - 1
  217. return rc
  218. }
  219. localRunEnd := countTrailingZeros(^curWordWith1s)
  220. runEnd = localRunEnd + longCtr*64
  221. rc.iv[runCount].start = uint16(runStart)
  222. rc.iv[runCount].length = uint16(runEnd) - 1 - uint16(runStart)
  223. runCount++
  224. // now, zero out everything right of runEnd.
  225. curWord = curWordWith1s & (curWordWith1s + 1)
  226. // We've lathered and rinsed, so repeat...
  227. }
  228. }
  229. //
  230. // newRunContainer16FromArray populates a new
  231. // runContainer16 from the contents of arr.
  232. //
  233. func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 {
  234. // keep this in sync with newRunContainer16FromVals above
  235. rc := &runContainer16{}
  236. ah := addHelper16{rc: rc}
  237. n := arr.getCardinality()
  238. var cur, prev uint16
  239. switch {
  240. case n == 0:
  241. // nothing more
  242. case n == 1:
  243. ah.m = append(ah.m, newInterval16Range(arr.content[0], arr.content[0]))
  244. ah.actuallyAdded++
  245. default:
  246. ah.runstart = arr.content[0]
  247. ah.actuallyAdded++
  248. for i := 1; i < n; i++ {
  249. prev = arr.content[i-1]
  250. cur = arr.content[i]
  251. ah.add(cur, prev, i)
  252. }
  253. ah.storeIval(ah.runstart, ah.runlen)
  254. }
  255. rc.iv = ah.m
  256. rc.card = int64(ah.actuallyAdded)
  257. return rc
  258. }
  259. // set adds the integers in vals to the set. Vals
  260. // must be sorted in increasing order; if not, you should set
  261. // alreadySorted to false, and we will sort them in place for you.
  262. // (Be aware of this side effect -- it will affect the callers
  263. // view of vals).
  264. //
  265. // If you have a small number of additions to an already
  266. // big runContainer16, calling Add() may be faster.
  267. func (rc *runContainer16) set(alreadySorted bool, vals ...uint16) {
  268. rc2 := newRunContainer16FromVals(alreadySorted, vals...)
  269. un := rc.union(rc2)
  270. rc.iv = un.iv
  271. rc.card = 0
  272. }
  273. // canMerge returns true iff the intervals
  274. // a and b either overlap or they are
  275. // contiguous and so can be merged into
  276. // a single interval.
  277. func canMerge16(a, b interval16) bool {
  278. if int64(a.last())+1 < int64(b.start) {
  279. return false
  280. }
  281. return int64(b.last())+1 >= int64(a.start)
  282. }
  283. // haveOverlap differs from canMerge in that
  284. // it tells you if the intersection of a
  285. // and b would contain an element (otherwise
  286. // it would be the empty set, and we return
  287. // false).
  288. func haveOverlap16(a, b interval16) bool {
  289. if int64(a.last())+1 <= int64(b.start) {
  290. return false
  291. }
  292. return int64(b.last())+1 > int64(a.start)
  293. }
  294. // mergeInterval16s joins a and b into a
  295. // new interval, and panics if it cannot.
  296. func mergeInterval16s(a, b interval16) (res interval16) {
  297. if !canMerge16(a, b) {
  298. panic(fmt.Sprintf("cannot merge %#v and %#v", a, b))
  299. }
  300. if b.start < a.start {
  301. res.start = b.start
  302. } else {
  303. res.start = a.start
  304. }
  305. if b.last() > a.last() {
  306. res.length = b.last() - res.start
  307. } else {
  308. res.length = a.last() - res.start
  309. }
  310. return
  311. }
  312. // intersectInterval16s returns the intersection
  313. // of a and b. The isEmpty flag will be true if
  314. // a and b were disjoint.
  315. func intersectInterval16s(a, b interval16) (res interval16, isEmpty bool) {
  316. if !haveOverlap16(a, b) {
  317. isEmpty = true
  318. return
  319. }
  320. if b.start > a.start {
  321. res.start = b.start
  322. } else {
  323. res.start = a.start
  324. }
  325. bEnd := b.last()
  326. aEnd := a.last()
  327. var resEnd uint16
  328. if bEnd < aEnd {
  329. resEnd = bEnd
  330. } else {
  331. resEnd = aEnd
  332. }
  333. res.length = resEnd - res.start
  334. return
  335. }
  336. // union merges two runContainer16s, producing
  337. // a new runContainer16 with the union of rc and b.
  338. func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
  339. // rc is also known as 'a' here, but golint insisted we
  340. // call it rc for consistency with the rest of the methods.
  341. var m []interval16
  342. alim := int64(len(rc.iv))
  343. blim := int64(len(b.iv))
  344. var na int64 // next from a
  345. var nb int64 // next from b
  346. // merged holds the current merge output, which might
  347. // get additional merges before being appended to m.
  348. var merged interval16
  349. var mergedUsed bool // is merged being used at the moment?
  350. var cura interval16 // currently considering this interval16 from a
  351. var curb interval16 // currently considering this interval16 from b
  352. pass := 0
  353. for na < alim && nb < blim {
  354. pass++
  355. cura = rc.iv[na]
  356. curb = b.iv[nb]
  357. if mergedUsed {
  358. mergedUpdated := false
  359. if canMerge16(cura, merged) {
  360. merged = mergeInterval16s(cura, merged)
  361. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  362. mergedUpdated = true
  363. }
  364. if canMerge16(curb, merged) {
  365. merged = mergeInterval16s(curb, merged)
  366. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  367. mergedUpdated = true
  368. }
  369. if !mergedUpdated {
  370. // we know that merged is disjoint from cura and curb
  371. m = append(m, merged)
  372. mergedUsed = false
  373. }
  374. continue
  375. } else {
  376. // !mergedUsed
  377. if !canMerge16(cura, curb) {
  378. if cura.start < curb.start {
  379. m = append(m, cura)
  380. na++
  381. } else {
  382. m = append(m, curb)
  383. nb++
  384. }
  385. } else {
  386. merged = mergeInterval16s(cura, curb)
  387. mergedUsed = true
  388. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  389. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  390. }
  391. }
  392. }
  393. var aDone, bDone bool
  394. if na >= alim {
  395. aDone = true
  396. }
  397. if nb >= blim {
  398. bDone = true
  399. }
  400. // finish by merging anything remaining into merged we can:
  401. if mergedUsed {
  402. if !aDone {
  403. aAdds:
  404. for na < alim {
  405. cura = rc.iv[na]
  406. if canMerge16(cura, merged) {
  407. merged = mergeInterval16s(cura, merged)
  408. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  409. } else {
  410. break aAdds
  411. }
  412. }
  413. }
  414. if !bDone {
  415. bAdds:
  416. for nb < blim {
  417. curb = b.iv[nb]
  418. if canMerge16(curb, merged) {
  419. merged = mergeInterval16s(curb, merged)
  420. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  421. } else {
  422. break bAdds
  423. }
  424. }
  425. }
  426. m = append(m, merged)
  427. }
  428. if na < alim {
  429. m = append(m, rc.iv[na:]...)
  430. }
  431. if nb < blim {
  432. m = append(m, b.iv[nb:]...)
  433. }
  434. res := &runContainer16{iv: m}
  435. return res
  436. }
  437. // unionCardinality returns the cardinality of the merger of two runContainer16s, the union of rc and b.
  438. func (rc *runContainer16) unionCardinality(b *runContainer16) uint64 {
  439. // rc is also known as 'a' here, but golint insisted we
  440. // call it rc for consistency with the rest of the methods.
  441. answer := uint64(0)
  442. alim := int64(len(rc.iv))
  443. blim := int64(len(b.iv))
  444. var na int64 // next from a
  445. var nb int64 // next from b
  446. // merged holds the current merge output, which might
  447. // get additional merges before being appended to m.
  448. var merged interval16
  449. var mergedUsed bool // is merged being used at the moment?
  450. var cura interval16 // currently considering this interval16 from a
  451. var curb interval16 // currently considering this interval16 from b
  452. pass := 0
  453. for na < alim && nb < blim {
  454. pass++
  455. cura = rc.iv[na]
  456. curb = b.iv[nb]
  457. if mergedUsed {
  458. mergedUpdated := false
  459. if canMerge16(cura, merged) {
  460. merged = mergeInterval16s(cura, merged)
  461. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  462. mergedUpdated = true
  463. }
  464. if canMerge16(curb, merged) {
  465. merged = mergeInterval16s(curb, merged)
  466. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  467. mergedUpdated = true
  468. }
  469. if !mergedUpdated {
  470. // we know that merged is disjoint from cura and curb
  471. //m = append(m, merged)
  472. answer += uint64(merged.last()) - uint64(merged.start) + 1
  473. mergedUsed = false
  474. }
  475. continue
  476. } else {
  477. // !mergedUsed
  478. if !canMerge16(cura, curb) {
  479. if cura.start < curb.start {
  480. answer += uint64(cura.last()) - uint64(cura.start) + 1
  481. //m = append(m, cura)
  482. na++
  483. } else {
  484. answer += uint64(curb.last()) - uint64(curb.start) + 1
  485. //m = append(m, curb)
  486. nb++
  487. }
  488. } else {
  489. merged = mergeInterval16s(cura, curb)
  490. mergedUsed = true
  491. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  492. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  493. }
  494. }
  495. }
  496. var aDone, bDone bool
  497. if na >= alim {
  498. aDone = true
  499. }
  500. if nb >= blim {
  501. bDone = true
  502. }
  503. // finish by merging anything remaining into merged we can:
  504. if mergedUsed {
  505. if !aDone {
  506. aAdds:
  507. for na < alim {
  508. cura = rc.iv[na]
  509. if canMerge16(cura, merged) {
  510. merged = mergeInterval16s(cura, merged)
  511. na = rc.indexOfIntervalAtOrAfter(int64(merged.last())+1, na+1)
  512. } else {
  513. break aAdds
  514. }
  515. }
  516. }
  517. if !bDone {
  518. bAdds:
  519. for nb < blim {
  520. curb = b.iv[nb]
  521. if canMerge16(curb, merged) {
  522. merged = mergeInterval16s(curb, merged)
  523. nb = b.indexOfIntervalAtOrAfter(int64(merged.last())+1, nb+1)
  524. } else {
  525. break bAdds
  526. }
  527. }
  528. }
  529. //m = append(m, merged)
  530. answer += uint64(merged.last()) - uint64(merged.start) + 1
  531. }
  532. for _, r := range rc.iv[na:] {
  533. answer += uint64(r.last()) - uint64(r.start) + 1
  534. }
  535. for _, r := range b.iv[nb:] {
  536. answer += uint64(r.last()) - uint64(r.start) + 1
  537. }
  538. return answer
  539. }
  540. // indexOfIntervalAtOrAfter is a helper for union.
  541. func (rc *runContainer16) indexOfIntervalAtOrAfter(key int64, startIndex int64) int64 {
  542. rc.myOpts.startIndex = startIndex
  543. rc.myOpts.endxIndex = 0
  544. w, already, _ := rc.search(key, &rc.myOpts)
  545. if already {
  546. return w
  547. }
  548. return w + 1
  549. }
  550. // intersect returns a new runContainer16 holding the
  551. // intersection of rc (also known as 'a') and b.
  552. func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
  553. a := rc
  554. numa := int64(len(a.iv))
  555. numb := int64(len(b.iv))
  556. res := &runContainer16{}
  557. if numa == 0 || numb == 0 {
  558. return res
  559. }
  560. if numa == 1 && numb == 1 {
  561. if !haveOverlap16(a.iv[0], b.iv[0]) {
  562. return res
  563. }
  564. }
  565. var output []interval16
  566. var acuri int64
  567. var bcuri int64
  568. astart := int64(a.iv[acuri].start)
  569. bstart := int64(b.iv[bcuri].start)
  570. var intersection interval16
  571. var leftoverstart int64
  572. var isOverlap, isLeftoverA, isLeftoverB bool
  573. var done bool
  574. toploop:
  575. for acuri < numa && bcuri < numb {
  576. isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
  577. intersectWithLeftover16(astart, int64(a.iv[acuri].last()), bstart, int64(b.iv[bcuri].last()))
  578. if !isOverlap {
  579. switch {
  580. case astart < bstart:
  581. acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart)
  582. if done {
  583. break toploop
  584. }
  585. astart = int64(a.iv[acuri].start)
  586. case astart > bstart:
  587. bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart)
  588. if done {
  589. break toploop
  590. }
  591. bstart = int64(b.iv[bcuri].start)
  592. //default:
  593. // panic("impossible that astart == bstart, since !isOverlap")
  594. }
  595. } else {
  596. // isOverlap
  597. output = append(output, intersection)
  598. switch {
  599. case isLeftoverA:
  600. // note that we change astart without advancing acuri,
  601. // since we need to capture any 2ndary intersections with a.iv[acuri]
  602. astart = leftoverstart
  603. bcuri++
  604. if bcuri >= numb {
  605. break toploop
  606. }
  607. bstart = int64(b.iv[bcuri].start)
  608. case isLeftoverB:
  609. // note that we change bstart without advancing bcuri,
  610. // since we need to capture any 2ndary intersections with b.iv[bcuri]
  611. bstart = leftoverstart
  612. acuri++
  613. if acuri >= numa {
  614. break toploop
  615. }
  616. astart = int64(a.iv[acuri].start)
  617. default:
  618. // neither had leftover, both completely consumed
  619. // optionally, assert for sanity:
  620. //if a.iv[acuri].endx != b.iv[bcuri].endx {
  621. // panic("huh? should only be possible that endx agree now!")
  622. //}
  623. // advance to next a interval
  624. acuri++
  625. if acuri >= numa {
  626. break toploop
  627. }
  628. astart = int64(a.iv[acuri].start)
  629. // advance to next b interval
  630. bcuri++
  631. if bcuri >= numb {
  632. break toploop
  633. }
  634. bstart = int64(b.iv[bcuri].start)
  635. }
  636. }
  637. } // end for toploop
  638. if len(output) == 0 {
  639. return res
  640. }
  641. res.iv = output
  642. return res
  643. }
  644. // intersectCardinality returns the cardinality of the
  645. // intersection of rc (also known as 'a') and b.
  646. func (rc *runContainer16) intersectCardinality(b *runContainer16) int64 {
  647. answer := int64(0)
  648. a := rc
  649. numa := int64(len(a.iv))
  650. numb := int64(len(b.iv))
  651. if numa == 0 || numb == 0 {
  652. return 0
  653. }
  654. if numa == 1 && numb == 1 {
  655. if !haveOverlap16(a.iv[0], b.iv[0]) {
  656. return 0
  657. }
  658. }
  659. var acuri int64
  660. var bcuri int64
  661. astart := int64(a.iv[acuri].start)
  662. bstart := int64(b.iv[bcuri].start)
  663. var intersection interval16
  664. var leftoverstart int64
  665. var isOverlap, isLeftoverA, isLeftoverB bool
  666. var done bool
  667. pass := 0
  668. toploop:
  669. for acuri < numa && bcuri < numb {
  670. pass++
  671. isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
  672. intersectWithLeftover16(astart, int64(a.iv[acuri].last()), bstart, int64(b.iv[bcuri].last()))
  673. if !isOverlap {
  674. switch {
  675. case astart < bstart:
  676. acuri, done = a.findNextIntervalThatIntersectsStartingFrom(acuri+1, bstart)
  677. if done {
  678. break toploop
  679. }
  680. astart = int64(a.iv[acuri].start)
  681. case astart > bstart:
  682. bcuri, done = b.findNextIntervalThatIntersectsStartingFrom(bcuri+1, astart)
  683. if done {
  684. break toploop
  685. }
  686. bstart = int64(b.iv[bcuri].start)
  687. //default:
  688. // panic("impossible that astart == bstart, since !isOverlap")
  689. }
  690. } else {
  691. // isOverlap
  692. answer += int64(intersection.last()) - int64(intersection.start) + 1
  693. switch {
  694. case isLeftoverA:
  695. // note that we change astart without advancing acuri,
  696. // since we need to capture any 2ndary intersections with a.iv[acuri]
  697. astart = leftoverstart
  698. bcuri++
  699. if bcuri >= numb {
  700. break toploop
  701. }
  702. bstart = int64(b.iv[bcuri].start)
  703. case isLeftoverB:
  704. // note that we change bstart without advancing bcuri,
  705. // since we need to capture any 2ndary intersections with b.iv[bcuri]
  706. bstart = leftoverstart
  707. acuri++
  708. if acuri >= numa {
  709. break toploop
  710. }
  711. astart = int64(a.iv[acuri].start)
  712. default:
  713. // neither had leftover, both completely consumed
  714. // optionally, assert for sanity:
  715. //if a.iv[acuri].endx != b.iv[bcuri].endx {
  716. // panic("huh? should only be possible that endx agree now!")
  717. //}
  718. // advance to next a interval
  719. acuri++
  720. if acuri >= numa {
  721. break toploop
  722. }
  723. astart = int64(a.iv[acuri].start)
  724. // advance to next b interval
  725. bcuri++
  726. if bcuri >= numb {
  727. break toploop
  728. }
  729. bstart = int64(b.iv[bcuri].start)
  730. }
  731. }
  732. } // end for toploop
  733. return answer
  734. }
  735. // get returns true iff key is in the container.
  736. func (rc *runContainer16) contains(key uint16) bool {
  737. _, in, _ := rc.search(int64(key), nil)
  738. return in
  739. }
  740. // numIntervals returns the count of intervals in the container.
  741. func (rc *runContainer16) numIntervals() int {
  742. return len(rc.iv)
  743. }
  744. // search returns alreadyPresent to indicate if the
  745. // key is already in one of our interval16s.
  746. //
  747. // If key is alreadyPresent, then whichInterval16 tells
  748. // you where.
  749. //
  750. // If key is not already present, then whichInterval16 is
  751. // set as follows:
  752. //
  753. // a) whichInterval16 == len(rc.iv)-1 if key is beyond our
  754. // last interval16 in rc.iv;
  755. //
  756. // b) whichInterval16 == -1 if key is before our first
  757. // interval16 in rc.iv;
  758. //
  759. // c) whichInterval16 is set to the minimum index of rc.iv
  760. // which comes strictly before the key;
  761. // so rc.iv[whichInterval16].last < key,
  762. // and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start
  763. // (Note that whichInterval16+1 won't exist when
  764. // whichInterval16 is the last interval.)
  765. //
  766. // runContainer16.search always returns whichInterval16 < len(rc.iv).
  767. //
  768. // If not nil, opts can be used to further restrict
  769. // the search space.
  770. //
  771. func (rc *runContainer16) search(key int64, opts *searchOptions) (whichInterval16 int64, alreadyPresent bool, numCompares int) {
  772. n := int64(len(rc.iv))
  773. if n == 0 {
  774. return -1, false, 0
  775. }
  776. startIndex := int64(0)
  777. endxIndex := n
  778. if opts != nil {
  779. startIndex = opts.startIndex
  780. // let endxIndex == 0 mean no effect
  781. if opts.endxIndex > 0 {
  782. endxIndex = opts.endxIndex
  783. }
  784. }
  785. // sort.Search returns the smallest index i
  786. // in [0, n) at which f(i) is true, assuming that on the range [0, n),
  787. // f(i) == true implies f(i+1) == true.
  788. // If there is no such index, Search returns n.
  789. // For correctness, this began as verbatim snippet from
  790. // sort.Search in the Go standard lib.
  791. // We inline our comparison function for speed, and
  792. // annotate with numCompares
  793. // to observe and test that extra bounds are utilized.
  794. i, j := startIndex, endxIndex
  795. for i < j {
  796. h := i + (j-i)/2 // avoid overflow when computing h as the bisector
  797. // i <= h < j
  798. numCompares++
  799. if !(key < int64(rc.iv[h].start)) {
  800. i = h + 1
  801. } else {
  802. j = h
  803. }
  804. }
  805. below := i
  806. // end std lib snippet.
  807. // The above is a simple in-lining and annotation of:
  808. /* below := sort.Search(n,
  809. func(i int) bool {
  810. return key < rc.iv[i].start
  811. })
  812. */
  813. whichInterval16 = below - 1
  814. if below == n {
  815. // all falses => key is >= start of all interval16s
  816. // ... so does it belong to the last interval16?
  817. if key < int64(rc.iv[n-1].last())+1 {
  818. // yes, it belongs to the last interval16
  819. alreadyPresent = true
  820. return
  821. }
  822. // no, it is beyond the last interval16.
  823. // leave alreadyPreset = false
  824. return
  825. }
  826. // INVAR: key is below rc.iv[below]
  827. if below == 0 {
  828. // key is before the first first interval16.
  829. // leave alreadyPresent = false
  830. return
  831. }
  832. // INVAR: key is >= rc.iv[below-1].start and
  833. // key is < rc.iv[below].start
  834. // is key in below-1 interval16?
  835. if key >= int64(rc.iv[below-1].start) && key < int64(rc.iv[below-1].last())+1 {
  836. // yes, it is. key is in below-1 interval16.
  837. alreadyPresent = true
  838. return
  839. }
  840. // INVAR: key >= rc.iv[below-1].endx && key < rc.iv[below].start
  841. // leave alreadyPresent = false
  842. return
  843. }
  844. // cardinality returns the count of the integers stored in the
  845. // runContainer16.
  846. func (rc *runContainer16) cardinality() int64 {
  847. if len(rc.iv) == 0 {
  848. rc.card = 0
  849. return 0
  850. }
  851. if rc.card > 0 {
  852. return rc.card // already cached
  853. }
  854. // have to compute it
  855. var n int64
  856. for _, p := range rc.iv {
  857. n += p.runlen()
  858. }
  859. rc.card = n // cache it
  860. return n
  861. }
  862. // AsSlice decompresses the contents into a []uint16 slice.
  863. func (rc *runContainer16) AsSlice() []uint16 {
  864. s := make([]uint16, rc.cardinality())
  865. j := 0
  866. for _, p := range rc.iv {
  867. for i := p.start; i <= p.last(); i++ {
  868. s[j] = i
  869. j++
  870. }
  871. }
  872. return s
  873. }
  874. // newRunContainer16 creates an empty run container.
  875. func newRunContainer16() *runContainer16 {
  876. return &runContainer16{}
  877. }
  878. // newRunContainer16CopyIv creates a run container, initializing
  879. // with a copy of the supplied iv slice.
  880. //
  881. func newRunContainer16CopyIv(iv []interval16) *runContainer16 {
  882. rc := &runContainer16{
  883. iv: make([]interval16, len(iv)),
  884. }
  885. copy(rc.iv, iv)
  886. return rc
  887. }
  888. func (rc *runContainer16) Clone() *runContainer16 {
  889. rc2 := newRunContainer16CopyIv(rc.iv)
  890. return rc2
  891. }
  892. // newRunContainer16TakeOwnership returns a new runContainer16
  893. // backed by the provided iv slice, which we will
  894. // assume exclusive control over from now on.
  895. //
  896. func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 {
  897. rc := &runContainer16{
  898. iv: iv,
  899. }
  900. return rc
  901. }
  902. const baseRc16Size = int(unsafe.Sizeof(runContainer16{}))
  903. const perIntervalRc16Size = int(unsafe.Sizeof(interval16{}))
  904. const baseDiskRc16Size = int(unsafe.Sizeof(uint16(0)))
  905. // see also runContainer16SerializedSizeInBytes(numRuns int) int
  906. // getSizeInBytes returns the number of bytes of memory
  907. // required by this runContainer16.
  908. func (rc *runContainer16) getSizeInBytes() int {
  909. return perIntervalRc16Size*len(rc.iv) + baseRc16Size
  910. }
  911. // runContainer16SerializedSizeInBytes returns the number of bytes of disk
  912. // required to hold numRuns in a runContainer16.
  913. func runContainer16SerializedSizeInBytes(numRuns int) int {
  914. return perIntervalRc16Size*numRuns + baseDiskRc16Size
  915. }
  916. // Add adds a single value k to the set.
  917. func (rc *runContainer16) Add(k uint16) (wasNew bool) {
  918. // TODO comment from runContainer16.java:
  919. // it might be better and simpler to do return
  920. // toBitmapOrArrayContainer(getCardinality()).add(k)
  921. // but note that some unit tests use this method to build up test
  922. // runcontainers without calling runOptimize
  923. k64 := int64(k)
  924. index, present, _ := rc.search(k64, nil)
  925. if present {
  926. return // already there
  927. }
  928. wasNew = true
  929. // increment card if it is cached already
  930. if rc.card > 0 {
  931. rc.card++
  932. }
  933. n := int64(len(rc.iv))
  934. if index == -1 {
  935. // we may need to extend the first run
  936. if n > 0 {
  937. if rc.iv[0].start == k+1 {
  938. rc.iv[0].start = k
  939. rc.iv[0].length++
  940. return
  941. }
  942. }
  943. // nope, k stands alone, starting the new first interval16.
  944. rc.iv = append([]interval16{newInterval16Range(k, k)}, rc.iv...)
  945. return
  946. }
  947. // are we off the end? handle both index == n and index == n-1:
  948. if index >= n-1 {
  949. if int64(rc.iv[n-1].last())+1 == k64 {
  950. rc.iv[n-1].length++
  951. return
  952. }
  953. rc.iv = append(rc.iv, newInterval16Range(k, k))
  954. return
  955. }
  956. // INVAR: index and index+1 both exist, and k goes between them.
  957. //
  958. // Now: add k into the middle,
  959. // possibly fusing with index or index+1 interval16
  960. // and possibly resulting in fusing of two interval16s
  961. // that had a one integer gap.
  962. left := index
  963. right := index + 1
  964. // are we fusing left and right by adding k?
  965. if int64(rc.iv[left].last())+1 == k64 && int64(rc.iv[right].start) == k64+1 {
  966. // fuse into left
  967. rc.iv[left].length = rc.iv[right].last() - rc.iv[left].start
  968. // remove redundant right
  969. rc.iv = append(rc.iv[:left+1], rc.iv[right+1:]...)
  970. return
  971. }
  972. // are we an addition to left?
  973. if int64(rc.iv[left].last())+1 == k64 {
  974. // yes
  975. rc.iv[left].length++
  976. return
  977. }
  978. // are we an addition to right?
  979. if int64(rc.iv[right].start) == k64+1 {
  980. // yes
  981. rc.iv[right].start = k
  982. rc.iv[right].length++
  983. return
  984. }
  985. // k makes a standalone new interval16, inserted in the middle
  986. tail := append([]interval16{newInterval16Range(k, k)}, rc.iv[right:]...)
  987. rc.iv = append(rc.iv[:left+1], tail...)
  988. return
  989. }
  990. //msgp:ignore runIterator
  991. // runIterator16 advice: you must call Next() at least once
  992. // before calling Cur(); and you should call HasNext()
  993. // before calling Next() to insure there are contents.
  994. type runIterator16 struct {
  995. rc *runContainer16
  996. curIndex int64
  997. curPosInIndex uint16
  998. curSeq int64
  999. }
  1000. // newRunIterator16 returns a new empty run container.
  1001. func (rc *runContainer16) newRunIterator16() *runIterator16 {
  1002. return &runIterator16{rc: rc, curIndex: -1}
  1003. }
  1004. // HasNext returns false if calling Next will panic. It
  1005. // returns true when there is at least one more value
  1006. // available in the iteration sequence.
  1007. func (ri *runIterator16) hasNext() bool {
  1008. if len(ri.rc.iv) == 0 {
  1009. return false
  1010. }
  1011. if ri.curIndex == -1 {
  1012. return true
  1013. }
  1014. return ri.curSeq+1 < ri.rc.cardinality()
  1015. }
  1016. // cur returns the current value pointed to by the iterator.
  1017. func (ri *runIterator16) cur() uint16 {
  1018. return ri.rc.iv[ri.curIndex].start + ri.curPosInIndex
  1019. }
  1020. // Next returns the next value in the iteration sequence.
  1021. func (ri *runIterator16) next() uint16 {
  1022. if !ri.hasNext() {
  1023. panic("no Next available")
  1024. }
  1025. if ri.curIndex >= int64(len(ri.rc.iv)) {
  1026. panic("runIterator.Next() going beyond what is available")
  1027. }
  1028. if ri.curIndex == -1 {
  1029. // first time is special
  1030. ri.curIndex = 0
  1031. } else {
  1032. ri.curPosInIndex++
  1033. if int64(ri.rc.iv[ri.curIndex].start)+int64(ri.curPosInIndex) == int64(ri.rc.iv[ri.curIndex].last())+1 {
  1034. ri.curPosInIndex = 0
  1035. ri.curIndex++
  1036. }
  1037. ri.curSeq++
  1038. }
  1039. return ri.cur()
  1040. }
  1041. // remove removes the element that the iterator
  1042. // is on from the run container. You can use
  1043. // Cur if you want to double check what is about
  1044. // to be deleted.
  1045. func (ri *runIterator16) remove() uint16 {
  1046. n := ri.rc.cardinality()
  1047. if n == 0 {
  1048. panic("runIterator.Remove called on empty runContainer16")
  1049. }
  1050. cur := ri.cur()
  1051. ri.rc.deleteAt(&ri.curIndex, &ri.curPosInIndex, &ri.curSeq)
  1052. return cur
  1053. }
  1054. type manyRunIterator16 struct {
  1055. rc *runContainer16
  1056. curIndex int64
  1057. curPosInIndex uint16
  1058. curSeq int64
  1059. }
  1060. func (rc *runContainer16) newManyRunIterator16() *manyRunIterator16 {
  1061. return &manyRunIterator16{rc: rc, curIndex: -1}
  1062. }
  1063. func (ri *manyRunIterator16) hasNext() bool {
  1064. if len(ri.rc.iv) == 0 {
  1065. return false
  1066. }
  1067. if ri.curIndex == -1 {
  1068. return true
  1069. }
  1070. return ri.curSeq+1 < ri.rc.cardinality()
  1071. }
  1072. // hs are the high bits to include to avoid needing to reiterate over the buffer in NextMany
  1073. func (ri *manyRunIterator16) nextMany(hs uint32, buf []uint32) int {
  1074. n := 0
  1075. if !ri.hasNext() {
  1076. return n
  1077. }
  1078. // start and end are inclusive
  1079. for n < len(buf) {
  1080. if ri.curIndex == -1 || int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex) <= 0 {
  1081. ri.curPosInIndex = 0
  1082. ri.curIndex++
  1083. if ri.curIndex == int64(len(ri.rc.iv)) {
  1084. break
  1085. }
  1086. buf[n] = uint32(ri.rc.iv[ri.curIndex].start) | hs
  1087. if ri.curIndex != 0 {
  1088. ri.curSeq += 1
  1089. }
  1090. n += 1
  1091. // not strictly necessarily due to len(buf)-n min check, but saves some work
  1092. continue
  1093. }
  1094. // add as many as you can from this seq
  1095. moreVals := minOfInt(int(ri.rc.iv[ri.curIndex].length-ri.curPosInIndex), len(buf)-n)
  1096. base := uint32(ri.rc.iv[ri.curIndex].start+ri.curPosInIndex+1) | hs
  1097. // allows BCE
  1098. buf2 := buf[n : n+moreVals]
  1099. for i := range buf2 {
  1100. buf2[i] = base + uint32(i)
  1101. }
  1102. // update values
  1103. ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
  1104. ri.curSeq += int64(moreVals)
  1105. n += moreVals
  1106. }
  1107. return n
  1108. }
  1109. // remove removes key from the container.
  1110. func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) {
  1111. var index int64
  1112. var curSeq int64
  1113. index, wasPresent, _ = rc.search(int64(key), nil)
  1114. if !wasPresent {
  1115. return // already removed, nothing to do.
  1116. }
  1117. pos := key - rc.iv[index].start
  1118. rc.deleteAt(&index, &pos, &curSeq)
  1119. return
  1120. }
  1121. // internal helper functions
  1122. func (rc *runContainer16) deleteAt(curIndex *int64, curPosInIndex *uint16, curSeq *int64) {
  1123. rc.card--
  1124. *curSeq--
  1125. ci := *curIndex
  1126. pos := *curPosInIndex
  1127. // are we first, last, or in the middle of our interval16?
  1128. switch {
  1129. case pos == 0:
  1130. if int64(rc.iv[ci].length) == 0 {
  1131. // our interval disappears
  1132. rc.iv = append(rc.iv[:ci], rc.iv[ci+1:]...)
  1133. // curIndex stays the same, since the delete did
  1134. // the advance for us.
  1135. *curPosInIndex = 0
  1136. } else {
  1137. rc.iv[ci].start++ // no longer overflowable
  1138. rc.iv[ci].length--
  1139. }
  1140. case pos == rc.iv[ci].length:
  1141. // length
  1142. rc.iv[ci].length--
  1143. // our interval16 cannot disappear, else we would have been pos == 0, case first above.
  1144. *curPosInIndex--
  1145. // if we leave *curIndex alone, then Next() will work properly even after the delete.
  1146. default:
  1147. //middle
  1148. // split into two, adding an interval16
  1149. new0 := newInterval16Range(rc.iv[ci].start, rc.iv[ci].start+*curPosInIndex-1)
  1150. new1start := int64(rc.iv[ci].start+*curPosInIndex) + 1
  1151. if new1start > int64(MaxUint16) {
  1152. panic("overflow?!?!")
  1153. }
  1154. new1 := newInterval16Range(uint16(new1start), rc.iv[ci].last())
  1155. tail := append([]interval16{new0, new1}, rc.iv[ci+1:]...)
  1156. rc.iv = append(rc.iv[:ci], tail...)
  1157. // update curIndex and curPosInIndex
  1158. *curIndex++
  1159. *curPosInIndex = 0
  1160. }
  1161. }
  1162. func have4Overlap16(astart, alast, bstart, blast int64) bool {
  1163. if alast+1 <= bstart {
  1164. return false
  1165. }
  1166. return blast+1 > astart
  1167. }
  1168. func intersectWithLeftover16(astart, alast, bstart, blast int64) (isOverlap, isLeftoverA, isLeftoverB bool, leftoverstart int64, intersection interval16) {
  1169. if !have4Overlap16(astart, alast, bstart, blast) {
  1170. return
  1171. }
  1172. isOverlap = true
  1173. // do the intersection:
  1174. if bstart > astart {
  1175. intersection.start = uint16(bstart)
  1176. } else {
  1177. intersection.start = uint16(astart)
  1178. }
  1179. switch {
  1180. case blast < alast:
  1181. isLeftoverA = true
  1182. leftoverstart = blast + 1
  1183. intersection.length = uint16(blast) - intersection.start
  1184. case alast < blast:
  1185. isLeftoverB = true
  1186. leftoverstart = alast + 1
  1187. intersection.length = uint16(alast) - intersection.start
  1188. default:
  1189. // alast == blast
  1190. intersection.length = uint16(alast) - intersection.start
  1191. }
  1192. return
  1193. }
  1194. func (rc *runContainer16) findNextIntervalThatIntersectsStartingFrom(startIndex int64, key int64) (index int64, done bool) {
  1195. rc.myOpts.startIndex = startIndex
  1196. rc.myOpts.endxIndex = 0
  1197. w, _, _ := rc.search(key, &rc.myOpts)
  1198. // rc.search always returns w < len(rc.iv)
  1199. if w < startIndex {
  1200. // not found and comes before lower bound startIndex,
  1201. // so just use the lower bound.
  1202. if startIndex == int64(len(rc.iv)) {
  1203. // also this bump up means that we are done
  1204. return startIndex, true
  1205. }
  1206. return startIndex, false
  1207. }
  1208. return w, false
  1209. }
  1210. func sliceToString16(m []interval16) string {
  1211. s := ""
  1212. for i := range m {
  1213. s += fmt.Sprintf("%v: %s, ", i, m[i])
  1214. }
  1215. return s
  1216. }
  1217. // selectInt16 returns the j-th value in the container.
  1218. // We panic of j is out of bounds.
  1219. func (rc *runContainer16) selectInt16(j uint16) int {
  1220. n := rc.cardinality()
  1221. if int64(j) > n {
  1222. panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n))
  1223. }
  1224. var offset int64
  1225. for k := range rc.iv {
  1226. nextOffset := offset + rc.iv[k].runlen() + 1
  1227. if nextOffset > int64(j) {
  1228. return int(int64(rc.iv[k].start) + (int64(j) - offset))
  1229. }
  1230. offset = nextOffset
  1231. }
  1232. panic(fmt.Sprintf("Cannot select %v since Cardinality is %v", j, n))
  1233. }
  1234. // helper for invert
  1235. func (rc *runContainer16) invertlastInterval(origin uint16, lastIdx int) []interval16 {
  1236. cur := rc.iv[lastIdx]
  1237. if cur.last() == MaxUint16 {
  1238. if cur.start == origin {
  1239. return nil // empty container
  1240. }
  1241. return []interval16{newInterval16Range(origin, cur.start-1)}
  1242. }
  1243. if cur.start == origin {
  1244. return []interval16{newInterval16Range(cur.last()+1, MaxUint16)}
  1245. }
  1246. // invert splits
  1247. return []interval16{
  1248. newInterval16Range(origin, cur.start-1),
  1249. newInterval16Range(cur.last()+1, MaxUint16),
  1250. }
  1251. }
  1252. // invert returns a new container (not inplace), that is
  1253. // the inversion of rc. For each bit b in rc, the
  1254. // returned value has !b
  1255. func (rc *runContainer16) invert() *runContainer16 {
  1256. ni := len(rc.iv)
  1257. var m []interval16
  1258. switch ni {
  1259. case 0:
  1260. return &runContainer16{iv: []interval16{newInterval16Range(0, MaxUint16)}}
  1261. case 1:
  1262. return &runContainer16{iv: rc.invertlastInterval(0, 0)}
  1263. }
  1264. var invstart int64
  1265. ult := ni - 1
  1266. for i, cur := range rc.iv {
  1267. if i == ult {
  1268. // invertlastInteval will add both intervals (b) and (c) in
  1269. // diagram below.
  1270. m = append(m, rc.invertlastInterval(uint16(invstart), i)...)
  1271. break
  1272. }
  1273. // INVAR: i and cur are not the last interval, there is a next at i+1
  1274. //
  1275. // ........[cur.start, cur.last] ...... [next.start, next.last]....
  1276. // ^ ^ ^
  1277. // (a) (b) (c)
  1278. //
  1279. // Now: we add interval (a); but if (a) is empty, for cur.start==0, we skip it.
  1280. if cur.start > 0 {
  1281. m = append(m, newInterval16Range(uint16(invstart), cur.start-1))
  1282. }
  1283. invstart = int64(cur.last() + 1)
  1284. }
  1285. return &runContainer16{iv: m}
  1286. }
  1287. func (iv interval16) equal(b interval16) bool {
  1288. return iv.start == b.start && iv.length == b.length
  1289. }
  1290. func (iv interval16) isSuperSetOf(b interval16) bool {
  1291. return iv.start <= b.start && b.last() <= iv.last()
  1292. }
  1293. func (iv interval16) subtractInterval(del interval16) (left []interval16, delcount int64) {
  1294. isect, isEmpty := intersectInterval16s(iv, del)
  1295. if isEmpty {
  1296. return nil, 0
  1297. }
  1298. if del.isSuperSetOf(iv) {
  1299. return nil, iv.runlen()
  1300. }
  1301. switch {
  1302. case isect.start > iv.start && isect.last() < iv.last():
  1303. new0 := newInterval16Range(iv.start, isect.start-1)
  1304. new1 := newInterval16Range(isect.last()+1, iv.last())
  1305. return []interval16{new0, new1}, isect.runlen()
  1306. case isect.start == iv.start:
  1307. return []interval16{newInterval16Range(isect.last()+1, iv.last())}, isect.runlen()
  1308. default:
  1309. return []interval16{newInterval16Range(iv.start, isect.start-1)}, isect.runlen()
  1310. }
  1311. }
  1312. func (rc *runContainer16) isubtract(del interval16) {
  1313. origiv := make([]interval16, len(rc.iv))
  1314. copy(origiv, rc.iv)
  1315. n := int64(len(rc.iv))
  1316. if n == 0 {
  1317. return // already done.
  1318. }
  1319. _, isEmpty := intersectInterval16s(newInterval16Range(rc.iv[0].start, rc.iv[n-1].last()), del)
  1320. if isEmpty {
  1321. return // done
  1322. }
  1323. // INVAR there is some intersection between rc and del
  1324. istart, startAlready, _ := rc.search(int64(del.start), nil)
  1325. ilast, lastAlready, _ := rc.search(int64(del.last()), nil)
  1326. rc.card = -1
  1327. if istart == -1 {
  1328. if ilast == n-1 && !lastAlready {
  1329. rc.iv = nil
  1330. return
  1331. }
  1332. }
  1333. // some intervals will remain
  1334. switch {
  1335. case startAlready && lastAlready:
  1336. res0, _ := rc.iv[istart].subtractInterval(del)
  1337. // would overwrite values in iv b/c res0 can have len 2. so
  1338. // write to origiv instead.
  1339. lost := 1 + ilast - istart
  1340. changeSize := int64(len(res0)) - lost
  1341. newSize := int64(len(rc.iv)) + changeSize
  1342. // rc.iv = append(pre, caboose...)
  1343. // return
  1344. if ilast != istart {
  1345. res1, _ := rc.iv[ilast].subtractInterval(del)
  1346. res0 = append(res0, res1...)
  1347. changeSize = int64(len(res0)) - lost
  1348. newSize = int64(len(rc.iv)) + changeSize
  1349. }
  1350. switch {
  1351. case changeSize < 0:
  1352. // shrink
  1353. copy(rc.iv[istart+int64(len(res0)):], rc.iv[ilast+1:])
  1354. copy(rc.iv[istart:istart+int64(len(res0))], res0)
  1355. rc.iv = rc.iv[:newSize]
  1356. return
  1357. case changeSize == 0:
  1358. // stay the same
  1359. copy(rc.iv[istart:istart+int64(len(res0))], res0)
  1360. return
  1361. default:
  1362. // changeSize > 0 is only possible when ilast == istart.
  1363. // Hence we now know: changeSize == 1 and len(res0) == 2
  1364. rc.iv = append(rc.iv, interval16{})
  1365. // len(rc.iv) is correct now, no need to rc.iv = rc.iv[:newSize]
  1366. // copy the tail into place
  1367. copy(rc.iv[ilast+2:], rc.iv[ilast+1:])
  1368. // copy the new item(s) into place
  1369. copy(rc.iv[istart:istart+2], res0)
  1370. return
  1371. }
  1372. case !startAlready && !lastAlready:
  1373. // we get to discard whole intervals
  1374. // from the search() definition:
  1375. // if del.start is not present, then istart is
  1376. // set as follows:
  1377. //
  1378. // a) istart == n-1 if del.start is beyond our
  1379. // last interval16 in rc.iv;
  1380. //
  1381. // b) istart == -1 if del.start is before our first
  1382. // interval16 in rc.iv;
  1383. //
  1384. // c) istart is set to the minimum index of rc.iv
  1385. // which comes strictly before the del.start;
  1386. // so del.start > rc.iv[istart].last,
  1387. // and if istart+1 exists, then del.start < rc.iv[istart+1].startx
  1388. // if del.last is not present, then ilast is
  1389. // set as follows:
  1390. //
  1391. // a) ilast == n-1 if del.last is beyond our
  1392. // last interval16 in rc.iv;
  1393. //
  1394. // b) ilast == -1 if del.last is before our first
  1395. // interval16 in rc.iv;
  1396. //
  1397. // c) ilast is set to the minimum index of rc.iv
  1398. // which comes strictly before the del.last;
  1399. // so del.last > rc.iv[ilast].last,
  1400. // and if ilast+1 exists, then del.last < rc.iv[ilast+1].start
  1401. // INVAR: istart >= 0
  1402. pre := rc.iv[:istart+1]
  1403. if ilast == n-1 {
  1404. rc.iv = pre
  1405. return
  1406. }
  1407. // INVAR: ilast < n-1
  1408. lost := ilast - istart
  1409. changeSize := -lost
  1410. newSize := int64(len(rc.iv)) + changeSize
  1411. if changeSize != 0 {
  1412. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1413. }
  1414. rc.iv = rc.iv[:newSize]
  1415. return
  1416. case startAlready && !lastAlready:
  1417. // we can only shrink or stay the same size
  1418. // i.e. we either eliminate the whole interval,
  1419. // or just cut off the right side.
  1420. res0, _ := rc.iv[istart].subtractInterval(del)
  1421. if len(res0) > 0 {
  1422. // len(res) must be 1
  1423. rc.iv[istart] = res0[0]
  1424. }
  1425. lost := 1 + (ilast - istart)
  1426. changeSize := int64(len(res0)) - lost
  1427. newSize := int64(len(rc.iv)) + changeSize
  1428. if changeSize != 0 {
  1429. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1430. }
  1431. rc.iv = rc.iv[:newSize]
  1432. return
  1433. case !startAlready && lastAlready:
  1434. // we can only shrink or stay the same size
  1435. res1, _ := rc.iv[ilast].subtractInterval(del)
  1436. lost := ilast - istart
  1437. changeSize := int64(len(res1)) - lost
  1438. newSize := int64(len(rc.iv)) + changeSize
  1439. if changeSize != 0 {
  1440. // move the tail first to make room for res1
  1441. copy(rc.iv[ilast+1+changeSize:], rc.iv[ilast+1:])
  1442. }
  1443. copy(rc.iv[istart+1:], res1)
  1444. rc.iv = rc.iv[:newSize]
  1445. return
  1446. }
  1447. }
  1448. // compute rc minus b, and return the result as a new value (not inplace).
  1449. // port of run_container_andnot from CRoaring...
  1450. // https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496
  1451. func (rc *runContainer16) AndNotRunContainer16(b *runContainer16) *runContainer16 {
  1452. if len(b.iv) == 0 || len(rc.iv) == 0 {
  1453. return rc
  1454. }
  1455. dst := newRunContainer16()
  1456. apos := 0
  1457. bpos := 0
  1458. a := rc
  1459. astart := a.iv[apos].start
  1460. alast := a.iv[apos].last()
  1461. bstart := b.iv[bpos].start
  1462. blast := b.iv[bpos].last()
  1463. alen := len(a.iv)
  1464. blen := len(b.iv)
  1465. for apos < alen && bpos < blen {
  1466. switch {
  1467. case alast < bstart:
  1468. // output the first run
  1469. dst.iv = append(dst.iv, newInterval16Range(astart, alast))
  1470. apos++
  1471. if apos < alen {
  1472. astart = a.iv[apos].start
  1473. alast = a.iv[apos].last()
  1474. }
  1475. case blast < astart:
  1476. // exit the second run
  1477. bpos++
  1478. if bpos < blen {
  1479. bstart = b.iv[bpos].start
  1480. blast = b.iv[bpos].last()
  1481. }
  1482. default:
  1483. // a: [ ]
  1484. // b: [ ]
  1485. // alast >= bstart
  1486. // blast >= astart
  1487. if astart < bstart {
  1488. dst.iv = append(dst.iv, newInterval16Range(astart, bstart-1))
  1489. }
  1490. if alast > blast {
  1491. astart = blast + 1
  1492. } else {
  1493. apos++
  1494. if apos < alen {
  1495. astart = a.iv[apos].start
  1496. alast = a.iv[apos].last()
  1497. }
  1498. }
  1499. }
  1500. }
  1501. if apos < alen {
  1502. dst.iv = append(dst.iv, newInterval16Range(astart, alast))
  1503. apos++
  1504. if apos < alen {
  1505. dst.iv = append(dst.iv, a.iv[apos:]...)
  1506. }
  1507. }
  1508. return dst
  1509. }
  1510. func (rc *runContainer16) numberOfRuns() (nr int) {
  1511. return len(rc.iv)
  1512. }
  1513. func (rc *runContainer16) containerType() contype {
  1514. return run16Contype
  1515. }
  1516. func (rc *runContainer16) equals16(srb *runContainer16) bool {
  1517. //p("both rc16")
  1518. // Check if the containers are the same object.
  1519. if rc == srb {
  1520. //p("same object")
  1521. return true
  1522. }
  1523. if len(srb.iv) != len(rc.iv) {
  1524. //p("iv len differ")
  1525. return false
  1526. }
  1527. for i, v := range rc.iv {
  1528. if v != srb.iv[i] {
  1529. //p("differ at iv i=%v, srb.iv[i]=%v, rc.iv[i]=%v", i, srb.iv[i], rc.iv[i])
  1530. return false
  1531. }
  1532. }
  1533. //p("all intervals same, returning true")
  1534. return true
  1535. }
上海开阖软件有限公司 沪ICP备12045867号-1