diff --git a/mr-banks.html b/mr-banks.html index 0bd0ea5..e58a7c7 100644 --- a/mr-banks.html +++ b/mr-banks.html @@ -353,9 +353,10 @@ if __name__ == "__main__":
- Uma coisa que eu não mencionei: Não são todos os dados - passados para as janelas; dados são agrupados e então - passados para as janelas. + Quando os registros são passados para a janela, eles são + agrupados (ao invés de conter todos os registros que se + encaixam na janela, somente os valores agrupados + são guardados).
@@ -371,18 +372,18 @@ if __name__ == "__main__":source
- .filter(new Selector(processor)).name(s"Selecting ${processor * 100}% messages")
- .process(new ProcessMessages(brokenMessageTag)).name("Message Processor")
- .flatMap(new MessageSpliter).name("Get Logs") // get the lines in the message
- .filter(new LogBrokenFilter).name("Remove broken logs")
- .filter(new MissingClientFilter).name("Remove logs without clients")
- .flatMap(new MetricExtractor).name("Create metrics")
- .assignTimestampsAndWatermarks(new MetricTimestampAndWatermarks(watermarkTime)).name("Watermark")
- .keyBy(_.key)
- .window(TumblingEventTimeWindows.of(windowTime))
- .allowedLateness(latenessTime)
- .sideOutputLateData(lateMessageTag)
- .reduce(new MetricReducer(), new MetricWindowTimeMatcher()).name("Group metrics")
+ .filter(new Selector(processor)).name(s"Selecting ${processor * 100}% messages")
+ .process(new ProcessMessages(brokenMessageTag)).name("Message Processor")
+ .flatMap(new MessageSpliter).name("Get Logs") // get the lines in the message
+ .filter(new LogBrokenFilter).name("Remove broken logs")
+ .filter(new MissingClientFilter).name("Remove logs without clients")
+ .flatMap(new MetricExtractor).name("Create metrics")
+ .assignTimestampsAndWatermarks(new MetricTimestampAndWatermarks(watermarkTime)).name("Watermark")
+ .keyBy(_.key)
+ .window(TumblingEventTimeWindows.of(windowTime))
+ .allowedLateness(latenessTime)
+ .sideOutputLateData(lateMessageTag)
+ .reduce(new MetricReducer(), new MetricWindowTimeMatcher()).name("Group metrics")