@@ -3,6 +3,7 @@ package vectorpipe.sources
33import java .io .{ByteArrayInputStream , IOException }
44import java .net .URI
55import java .nio .charset .StandardCharsets
6+ import java .time .Instant
67import java .util .zip .GZIPInputStream
78
89import cats .implicits ._
@@ -27,7 +28,7 @@ object ChangesetSource extends Logging {
2728 private implicit val dateTimeDecoder : Decoder [DateTime ] =
2829 Decoder .instance(a => a.as[String ].map(DateTime .parse(_, formatter)))
2930
30- def getSequence (baseURI : URI , sequence : Int ): Seq [Changeset ] = {
31+ def getChangeset (baseURI : URI , sequence : Int ): Seq [Changeset ] = {
3132 val s = f " $sequence%09d "
3233 val path = s " ${s.slice(0 , 3 )}/ ${s.slice(3 , 6 )}/ ${s.slice(6 , 9 )}.osm.gz "
3334
@@ -40,7 +41,7 @@ object ChangesetSource extends Logging {
4041 if (response.code == 404 ) {
4142 logDebug(s " $sequence is not yet available, sleeping. " )
4243 Thread .sleep(Delay .toMillis)
43- getSequence (baseURI, sequence)
44+ getChangeset (baseURI, sequence)
4445 } else {
4546 // NOTE: if diff bodies get really large, switch to a SAX parser to help with the memory footprint
4647 val bais = new ByteArrayInputStream (response.body)
@@ -62,25 +63,27 @@ object ChangesetSource extends Logging {
6263 case e : IOException =>
6364 logWarning(s " Error fetching changeset $sequence" , e)
6465 Thread .sleep(Delay .toMillis)
65- getSequence (baseURI, sequence)
66+ getChangeset (baseURI, sequence)
6667 }
6768 }
6869
70+ case class Sequence (last_run : DateTime , sequence : Long )
71+
6972 @ memoize(maxSize = 1 , expiresAfter = 30 seconds)
70- def getCurrentSequence (baseURI : URI ): Option [Int ] = {
73+ def getCurrentSequence (baseURI : URI ): Option [Sequence ] = {
7174 try {
7275 val response =
7376 Http (baseURI.resolve(" state.yaml" ).toString).asString
7477
7578 val state = yaml.parser
7679 .parse(response.body)
7780 .leftMap(err => err : Error )
78- .flatMap(_.as[State ])
81+ .flatMap(_.as[Sequence ])
7982 .valueOr(throw _)
8083
8184 logDebug(s " $baseURI state: ${state.sequence} @ ${state.last_run}" )
8285
83- Some (state.sequence )
86+ Some (state)
8487 } catch {
8588 case err : Throwable =>
8689 logError(" Error fetching / parsing changeset state." , err)
@@ -89,5 +92,42 @@ object ChangesetSource extends Logging {
8992 }
9093 }
9194
92- case class State (last_run : DateTime , sequence : Int )
95+ def getSequence (baseURI : URI , sequence : Long ): Option [Sequence ] = {
96+ val s = f " ${sequence+ 1 }%09d "
97+ val path = s " ${s.slice(0 , 3 )}/ ${s.slice(3 , 6 )}/ ${s.slice(6 , 9 )}.state.txt "
98+
99+ try {
100+ val response =
101+ Http (baseURI.resolve(path).toString).asString
102+
103+ val state = yaml.parser
104+ .parse(response.body)
105+ .leftMap(err => err : Error )
106+ .flatMap(_.as[Sequence ])
107+ .valueOr(throw _)
108+
109+ Some (state)
110+ } catch {
111+ case err : Throwable =>
112+ logError(" Error fetching / parsing changeset state." , err)
113+
114+ None
115+ }
116+ }
117+
118+ def estimateSequenceNumber (modifiedTime : Instant , baseURI : URI ): Long = {
119+ val current = getCurrentSequence(baseURI)
120+ val diffMinutes = (current.get.last_run.toInstant.getMillis/ 1000 - modifiedTime.getEpochSecond) / 60
121+ current.get.sequence - diffMinutes
122+ }
123+
124+ def findSequenceFor (modifiedTime : Instant , baseURI : URI ): Long = {
125+ var guess = estimateSequenceNumber(modifiedTime, baseURI)
126+ val target = org.joda.time.Instant .parse(modifiedTime.toString)
127+
128+ while (getSequence(baseURI, guess).get.last_run.isAfter(target)) { guess -= 1 }
129+ while (getSequence(baseURI, guess).get.last_run.isBefore(target)) { guess += 1 }
130+
131+ getSequence(baseURI, guess).get.sequence
132+ }
93133}
0 commit comments