Skip to content

Commit 1787a49

Browse files
author
Fabian Widmann
committed
Added binary search as a way to find the starting index for the parser.
1 parent a948287 commit 1787a49

File tree

4 files changed

+106
-16
lines changed

4 files changed

+106
-16
lines changed

dwd_geo_test.php

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ function prettyPrint($obj)
2525
$coordinates = new Coordinate(48.398400, 9.091550);
2626

2727
$date = Carbon::parse('2017-09-17 00:01:00');
28+
$date = Carbon::instance($date)->setTimezone('utc');;
2829
prettyPrint("Checking for Coordinates: " . $coordinates->format(new GeoJSON()) . ", @ " . $date->format(DateTime::ATOM));
2930

3031

src/fwidm/dwdHourlyCrawler/hourly/DWDHourlyCrawler.php

+7-6
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,17 @@ public function getAllStations(bool $active=false)
5252
*
5353
* This is important if a station's files are missing on the ftp.
5454
* @param array $nearestStations
55-
* @param DateTime $dateTime
55+
* @param DateTime $date
5656
* @param int $timeMinuteLimit
5757
* @return array [params, stations]
5858
*/
59-
public function getDataInInterval($coordinatesRequest, DateTime $dateTime, $timeMinuteLimit = 30, $sorted = true)
59+
public function getDataInInterval($coordinatesRequest, DateTime $date, $timeMinuteLimit = 30, $sorted = true)
6060
{
6161

6262
$parameters = [];
6363
$queriedStations = [];
64+
65+
$date = Carbon::instance($date)->setTimezone('utc');
6466
foreach ($this->services as $var => $hourlyService) {
6567
$stations = $this->getStations($hourlyService, true);
6668

@@ -88,14 +90,14 @@ public function getDataInInterval($coordinatesRequest, DateTime $dateTime, $time
8890
continue;
8991
}
9092

91-
$parameters[$var] = $this->retrieveData($content, $nearestStation, $coordinatesRequest, $hourlyService, $dateTime, $timeMinuteLimit);
93+
$parameters[$var] = $this->retrieveData($content, $nearestStation, $coordinatesRequest, $hourlyService, $date, $timeMinuteLimit);
9294

9395
//addStation
9496
if (count($parameters[$var]) > 0 && !isset($queriedStations['station-' . $nearestStation->getId()])) {
9597
$queriedStations['station-' . $nearestStation->getId()] = $nearestStation;
9698
}
9799

98-
if (isset($parameters[$var]))
100+
if (count($parameters[$var])>0)
99101
break;
100102
}
101103
}
@@ -163,8 +165,7 @@ public function getDataByDay(Coordinate $coordinatesRequest, DateTime $day)
163165
* @param bool $activeOnly
164166
* @return array
165167
*/
166-
public
167-
function getStations(AbstractHourlyService $controller, bool $activeOnly = false, bool $forceDownloadFile = false)
168+
public function getStations(AbstractHourlyService $controller, bool $activeOnly = false, bool $forceDownloadFile = false)
168169
{
169170
$downloadFile = false || $forceDownloadFile;
170171
$stationsFTPPath = DWDConfiguration::getHourlyConfiguration()->parameters;

src/fwidm/dwdHourlyCrawler/hourly/services/AbstractHourlyService.php

+90-6
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,16 @@ public function getParameter(): string
4444
* Parse the textual representation of DWD Data, can be filtered by specifying before and after.
4545
* This means if you specify after - you will get timestamps after the specified team
4646
* If you also specify before you can pinpoint values.
47+
* @deprecated
4748
* @param String $content - Textual representation of a DWD Hourly/Recent pressure file.
48-
* @param DateTime|null $start - returns all values after the specific time
49-
* @param DateTime|null $end - returns all values after $after AND after if set.
49+
* @param Carbon|null $startDate - returns all values after the specific time
50+
* @param Carbon|null $endDate - returns all values after $after AND after if set.
5051
* @return array of parameters
5152
* @throws ParseError
5253
*/
53-
public function parseHourlyData(String $content, DWDStation $nearestStation, Coordinate $coordinate, DateTime $start = null, DateTime $end = null): array
54+
public function parseHourlyDataOld(String $content, DWDStation $nearestStation, Coordinate $coordinate, Carbon $startDate = null, Carbon $endDate = null): array
5455
{
56+
$time = microtime(true);
5557
$lines = explode('eor', $content);
5658
$data = array();
5759

@@ -61,7 +63,35 @@ public function parseHourlyData(String $content, DWDStation $nearestStation, Coo
6163
* 2. from this day, calculate the hour difference between requested and $start+$end
6264
* 3. jump to the specific lines
6365
* 4. parse
66+
* DOES NOT PROVIDE CORRECT DATA - the dwd files do skip missing data instead of providing "-999" as value, as such the optimization will not work.
6467
*/
68+
69+
// $newestDate = null;
70+
// //retrieve the latest line that contains a valid date
71+
// for ($i = count($lines) - 1; !isset($newestDate); $i++) {
72+
// $newestData = str_replace(' ', '', $lines[count($lines) - $i]);
73+
// $cols = explode(';', $newestData);
74+
// if (sizeof($cols) > 3){
75+
// $newestDate = Carbon::createFromFormat($this->getTimeFormat(), $cols[1], 'utc');
76+
// break;
77+
// }
78+
// }
79+
// /* @var $newestDate Carbon */
80+
// $start = min($newestDate->diffInHours($startDate), $newestDate->diffInHours($endDate));
81+
// $end = max($newestDate->diffInHours($startDate), $newestDate->diffInHours($endDate));
82+
// DWDUtil::log("MAXMIN", "start=" .$start."; end=".$end. "available Lines=".count($lines));
83+
// //Retrieve the rest of the data that is found between start and end.
84+
// for ($i = $start; $i<count($lines) && $i < $end; $i++) {
85+
// $lines[$i] = str_replace(' ', '', $lines[$i]);
86+
// $cols = explode(';', $lines[$i]);
87+
// $date = Carbon::createFromFormat($this->getTimeFormat(), $cols[1], 'utc');
88+
// if (isset($date)) {
89+
// $data[] = $this->createParameter($cols, $date, $nearestStation, $coordinate);
90+
// } else
91+
// throw new ParseError(self::class . " - Error while parsing date: col=" . $cols[1] . " | date=" . $date);
92+
// }
93+
94+
DWDUtil::log("PARSER", "DATE=[" . $endDate->toIso8601String() . "," . $startDate->toIso8601String() . "]");
6595
for ($i = sizeof($lines) - 1; $i > 0; $i--) {
6696
$lines[$i] = str_replace(' ', '', $lines[$i]);
6797

@@ -75,7 +105,7 @@ public function parseHourlyData(String $content, DWDStation $nearestStation, Coo
75105
switch (func_num_args()) {
76106
//$start is set
77107
case 4: {
78-
if ($date >= $start) {
108+
if ($date >= $startDate) {
79109
$temp = $this->createParameter($cols, $date, $nearestStation, $coordinate);
80110

81111
$data[] = $temp;
@@ -87,12 +117,12 @@ public function parseHourlyData(String $content, DWDStation $nearestStation, Coo
87117
}
88118
//$start & $end are set
89119
case 5: {
90-
if ($date <= $end && $date >= $start) {
120+
if ($date <= $endDate && $date >= $startDate) {
91121
$temp = $this->createParameter($cols, $date, $nearestStation, $coordinate);
92122

93123
$data[] = $temp;
94124
} else
95-
if ($date <= $start) {
125+
if ($date <= $startDate) {
96126
//break from loop and switch
97127
break 2;
98128
}
@@ -109,10 +139,64 @@ public function parseHourlyData(String $content, DWDStation $nearestStation, Coo
109139
} else
110140
throw new ParseError(self::class . " - Error while parsing date: col=" . $cols[1] . " | date=" . $date);
111141
}
142+
DWDUtil::log("PARSER", "RetCount=" . count($data));
143+
144+
DWDUtil::log("TIMER", "Duration=" . (microtime(true) - $time));
145+
146+
return $data;
147+
}
148+
149+
150+
public function parseHourlyData(String $content, DWDStation $nearestStation, Coordinate $coordinate, Carbon $startDate, Carbon $endDate): array
151+
{
152+
$start = $startDate->format($this->getTimeFormat());
153+
$end = $endDate->format($this->getTimeFormat());
154+
155+
$content = str_replace([" ", PHP_EOL], "", $content);
156+
$lines = explode(";eor", $content);
157+
$data = [];
158+
$startIndex = $this->binarySearch($start, $lines);
159+
print "go station=$nearestStation<br>";
160+
for ($i = (int)$startIndex; $i < count($lines); $i++) {
161+
$cols = explode(';', $lines[$i]);
162+
$date = Carbon::createFromFormat($this->getTimeFormat(), $cols[1], 'utc');
112163

164+
print $i . ": " . $date->toIso8601String() . ">>> DIFF=" . $endDate->diff($date)->h . "<br>";
165+
166+
if ($date <= $endDate && $date >= $startDate) {
167+
print "1.=".(int)($date <= $endDate)." 2.=".(int)($date >= $startDate)."<br>";
168+
$temp = $this->createParameter($cols, $date, $nearestStation, $coordinate);
169+
$data[] = $temp;
170+
} else
171+
break; //break if we exceed our end point
172+
173+
}
113174
return $data;
114175
}
115176

177+
/** Finds the position of $item in $array
178+
* @param $item
179+
* @param $array
180+
* @return int
181+
*/
182+
private function binarySearch($item, $array)
183+
{
184+
$low = 0;
185+
$high = count($array);
186+
while ($high - $low > 1) {
187+
$center = ($high + $low) / 2;
188+
// print("high=$high, low=$low, center=$center -- val=".(int)explode(';', $array[$center])[1]."<br>");
189+
if ((int)explode(';', $array[$center])[1] < (int)$item) {
190+
$low = $center;
191+
} else
192+
$high = $center;
193+
}
194+
if ($high == count($array))
195+
throw new ParseError(self::class . " - Error while searching for position of item=" . $item . "high=" . $high . "; val=" . $array[$high]);
196+
else
197+
return $high + 1;
198+
}
199+
116200
/**
117201
* Get the dateformat. default is in dwdHourly->parserSettings->dateFormat. Override as needed (ex. solar).
118202
* @return string

src/fwidm/dwdHourlyCrawler/util/FractalWrapper.php

+8-4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
use League\Fractal\Manager;
1313
use League\Fractal\Resource\Collection;
1414
use League\Fractal\Resource\Item;
15+
use League\Fractal\Resource\NullResource;
16+
use League\Fractal\Resource\ResourceAbstract;
1517
use League\Fractal\Resource\ResourceInterface;
1618
use League\Fractal\Serializer\DataArraySerializer;
1719

@@ -28,11 +30,13 @@ class FractalWrapper
2830
/** Transform the given object or array with the given transformer to a Resource.
2931
* @param $obj
3032
* @param $transformer
31-
* @return ResourceInterface
33+
* @return ResourceAbstract
3234
* @throws DWDLibException
3335
*/
34-
public static function toResource($obj, $transformer): ResourceInterface
36+
public static function toResource($obj, $transformer): ResourceAbstract
3537
{
38+
if (!$obj)
39+
return new NullResource();
3640
$resource = null;
3741
try {
3842
if (is_array($obj)) {
@@ -51,7 +55,7 @@ public static function toResource($obj, $transformer): ResourceInterface
5155
* @param string $transformer
5256
* @return array
5357
*/
54-
public static function toArray(ResourceInterface $resource, $serializer = DataArraySerializer::class)
58+
public static function toArray(ResourceAbstract $resource, $serializer = DataArraySerializer::class)
5559
{
5660
$manager = new Manager();
5761
$manager->setSerializer(new $serializer());
@@ -63,7 +67,7 @@ public static function toArray(ResourceInterface $resource, $serializer = DataAr
6367
* @param string $serializer
6468
* @return string
6569
*/
66-
public static function toJson(ResourceInterface $resource, $options = 0, $serializer = DataArraySerializer::class)
70+
public static function toJson(ResourceAbstract $resource, $options = 0, $serializer = DataArraySerializer::class)
6771
{
6872
$manager = new Manager();
6973
$manager->setSerializer(new $serializer());

0 commit comments

Comments
 (0)