Skip to content

Commit 0179699

Browse files
committed
Parser optimization for memory usage
1 parent cacb790 commit 0179699

File tree

2 files changed

+95
-38
lines changed

2 files changed

+95
-38
lines changed

src/EDI/Parser.php

Lines changed: 87 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?php
22
/**
33
* EDIFACT Messages Parser
4-
* (c)2016 Stefano Sabatini
4+
* (c)2018 Stefano Sabatini
55
*/
66

77
namespace EDI;
@@ -76,69 +76,98 @@ public function __construct($url = null)
7676
if ($this->unbChecked !== false) {
7777
$this->resetUNB();
7878
}
79+
7980
$this->errors=array();
81+
$this->parsedfile=array();
82+
8083
if ($url===null) {
8184
return;
8285
}
8386
if (is_array($url)) {
84-
$tmparr=$url;
87+
/**
88+
* Object constructed with an array as argument
89+
*/
8590
if (count($url) == 1) {
86-
$tmparr=$this->unwrap($url[0]);
91+
$url=$this->unwrap($url[0]);
8792
}
88-
$this->rawSegments = $tmparr;
89-
$this->parse($tmparr);
93+
$this->rawSegments = $url;
94+
$this->parse($url);
9095
} elseif (file_exists($url)) {
91-
$this->load($url); //FILE URL
96+
/**
97+
* Object constructed with a path to a file as argument
98+
*/
99+
$this->load($url);
92100
} else {
93-
$this->loadString($url); //STRING
101+
/**
102+
* Object constructed with a string as argument
103+
*/
104+
$this->loadString($url);
94105
}
95106
}
96107

97-
//Parse edi array
98-
public function parse($file2)
108+
/**
109+
* Parse edi array
110+
*
111+
* @param array $file2
112+
*
113+
* @return array
114+
*/
115+
public function parse(&$file2)
99116
{
100-
$i=0;
101-
$this->errors=array();
102-
foreach ($file2 as $x => &$line) {
103-
$i++;
104-
$line = preg_replace('#[\x00\r\n]#', '', $line); //null byte and carriage return removal (CR+LF)
117+
118+
//while ($line = array_shift($file2))
119+
$t = count($file2);
120+
for ($i = 1; $i <= $t; $i++) {
121+
$line = array_shift($file2);
122+
123+
/**
124+
* Null byte and carriage return removal (CR+LF)
125+
*/
126+
$line = preg_replace('#[\x00\r\n]#', '', $line);
105127
if (preg_match($this->stripChars, $line)) {
106128
$this->errors[]="There's a not printable character on line ".$i.": ". $line;
107129
}
108-
$line = preg_replace($this->stripChars, '', trim($line)); //basic sanitization, remove non printable chars
109-
if (strlen($line)<2) {
110-
unset($file2[$x]);
130+
131+
/**
132+
* Basic sanitization, remove non printable chars
133+
*/
134+
$line = preg_replace($this->stripChars, '', trim($line));
135+
if (strlen($line) < 2) {
111136
continue;
112137
}
138+
113139
switch (substr($line, 0, 3)) {
114140
case "UNA":
115141
if (!$this->unaChecked) {
116142
$this->analyseUNA(substr($line, 4, 6));
117143
}
118-
unset($file2[$x]);
119144
break;
120145
case "UNB":
121146
$line=$this->splitSegment($line);
122147
if (!$this->unbChecked) {
123148
$this->analyseUNB($line[1]);
124149
}
150+
$this->parsedfile[] = $line;
125151
break;
126152
case "UNH":
127153
$line=$this->splitSegment($line);
128154
$this->analyseUNH($line);
155+
$this->parsedfile[] = $line;
129156
break;
130157
default:
131158
$line=$this->splitSegment($line);
159+
$this->parsedfile[] = $line;
132160
break;
133161
}
134162
}
135-
$this->parsedfile=array_values($file2); //reindex
136-
return $file2;
163+
return $this->parsedfile;
137164
}
138165

139166

140167
/**
141168
* Reset UNA's characters definition
169+
*
170+
* @return void
142171
*/
143172
private function resetUNA()
144173
{
@@ -153,6 +182,8 @@ private function resetUNA()
153182

154183
/**
155184
* Reset UNB's encoding definition
185+
*
186+
* @return void
156187
*/
157188
private function resetUNB()
158189
{
@@ -162,7 +193,10 @@ private function resetUNB()
162193

163194
/**
164195
* Read UNA's characters definition
196+
*
165197
* @param string $line : UNA definition line (without UNA tag). Example : :+.? '
198+
*
199+
* @return void
166200
*/
167201
public function analyseUNA($line)
168202
{
@@ -189,24 +223,33 @@ public function analyseUNA($line)
189223
}
190224

191225
/**
192-
* Read UNA's characters definition
193-
* @param string $line : UNB definition line (without UNB tag). Example UNOA:2
226+
* UNB line analysis
227+
*
228+
* @param string $encoding UNB definition line (without UNB tag). Example UNOA:2
229+
*
230+
* @return void
194231
*/
195232
public function analyseUNB($encoding)
196233
{
197234
if (is_array($encoding)) {
198235
$encoding = $encoding[0];
199236
}
200237
$this->encoding = $encoding;
201-
if (isset($this->encodingToStripChars[$encoding])) { // we have a normed char set for your content
238+
/**
239+
* If there's a regex defined for this character set, use it
240+
*/
241+
if (isset($this->encodingToStripChars[$encoding])) {
202242
$this->setStripRegex($this->encodingToStripChars[$encoding]);
203243
}
204244
$this->unbChecked = true;
205245
}
206246

207247
/**
208248
* Identify message type
209-
* @param string $line : UNH segment
249+
*
250+
* @param string $line UNH segment
251+
*
252+
* @return void
210253
*/
211254
public function analyseUNH($line)
212255
{
@@ -222,7 +265,13 @@ public function analyseUNH($line)
222265
$this->messageDirectory = $lineElement[2];
223266
}
224267

225-
//unwrap string splitting rows on terminator (if not escaped)
268+
/**
269+
* Unwrap string splitting rows on terminator (if not escaped)
270+
*
271+
* @param string $string
272+
*
273+
* @return void
274+
*/
226275
private function unwrap($string)
227276
{
228277
if (!$this->unaChecked && substr($string, 0, 3) === "UNA") {
@@ -232,16 +281,15 @@ private function unwrap($string)
232281
$this->analyseUNB(preg_replace("#^UNB\+#", "", substr($string, 0, 8)));
233282
}
234283

235-
$file2=array();
236284
$file=preg_split(self::$DELIMITER."(?<!".$this->symbRel.")".$this->symbEnd.self::$DELIMITER."i", $string);
237285
$end = stripslashes($this->symbEnd);
238-
foreach ($file as &$line) {
239-
$temp=$line.$end;
240-
if ($temp!=$end) {
241-
$file2[]=$temp;
286+
foreach ($file as $fc => &$line) {
287+
if (trim($line) == '') {
288+
unset($file[$fc]);
242289
}
290+
$line .= $end;
243291
}
244-
return $file2;
292+
return $file;
245293
}
246294

247295
//Segments
@@ -268,8 +316,9 @@ private function splitData($str)
268316
$arr=preg_split(self::$DELIMITER."(?<!".$this->symbRel.")".$this->sepComp.self::$DELIMITER, $str); //split on sepComp if not escaped (negative lookbehind)
269317
if (count($arr)==1) {
270318
return preg_replace(self::$DELIMITER.$this->symbRel."(?=".$this->symbRel.")|".$this->symbRel."(?=".$this->sepData.")|".$this->symbRel."(?=".$this->sepComp.")|".$this->symbRel."(?=".$this->symbEnd.")".self::$DELIMITER, "", $str); //remove symbRel if not escaped
271-
} foreach ($arr as &$value) {
272-
$value=preg_replace(self::$DELIMITER.$this->symbRel."(?=".$this->symbRel.")|".$this->symbRel."(?=".$this->sepData.")|".$this->symbRel."(?=".$this->sepComp.")|".$this->symbRel."(?=".$this->symbEnd.")".self::$DELIMITER, "", $value);
319+
}
320+
foreach ($arr as &$value) {
321+
$value=preg_replace(self::$DELIMITER.$this->symbRel."(?=".$this->symbRel.")|".$this->symbRel."(?=".$this->sepData.")|".$this->symbRel."(?=".$this->sepComp.")|".$this->symbRel."(?=".$this->symbEnd.")".self::$DELIMITER, "", $value);
273322
}
274323
return $arr;
275324
}
@@ -300,11 +349,11 @@ public function load($url)
300349
}
301350

302351
//load the message from a string
303-
public function loadString($string)
352+
public function loadString(&$string)
304353
{
305-
$arr = $this->unwrap($string);
306-
$this->rawSegments = $arr;
307-
return $this->parse($arr);
354+
$string = $this->unwrap($string);
355+
$this->rawSegments = $string;
356+
return $this->parse($string);
308357
}
309358

310359
// change the default regex used for stripping invalid characters
@@ -322,4 +371,4 @@ public function getMessageDirectory()
322371
{
323372
return $this->messageDirectory;
324373
}
325-
}
374+
}

tests/EDITest/ParserTest.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,14 @@ public function testArrayInputNoErrors()
114114
$this->assertEmpty($result);
115115
}
116116

117+
public function testArrayInputEmptyLine()
118+
{
119+
$arr= ["LOC+9+VNSGN'", "", "LOC+11+ITGOA'"];
120+
$p=new Parser($arr);
121+
$result=$p->errors();
122+
$this->assertEmpty($result);
123+
}
124+
117125
public function testLoadFile()
118126
{
119127
$p=new Parser(__DIR__."/../files/example.edi");

0 commit comments

Comments
 (0)