Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
Changelog
=========

0.4
-----

Changes:

- Add X12 parsing support (X12 -> JSON/Python dict).
- Add `SegmentParser` for parsing individual segments against a grammar.
- Add `X12Parser` for parsing complete X12 documents with auto-detection.
- Add `GrammarRegistry` for mapping segment IDs to grammar definitions.
- Add `ParsedSegment`, `ParsedElement`, `ParsedCompositeElement`, `ParsedComponent`, `ParsedLoop` data structures.
- Add `detect_delimiters()` for automatic ISA delimiter detection.
- Add `parse_x12()` and `parse_x12_to_json()` convenience functions.
- Add `LoopDefinition` for defining loop structures (start segment + children).
- Add loop-aware parsing: `X12Parser` groups segments into `ParsedLoop` objects when loop definitions are registered.
- Add `GrammarRegistry.register_loop()` which auto-registers all segment grammars in a loop definition.
- Add `GrammarRegistry.register_all()` for batch segment registration.

0.3
-----

Expand Down
110 changes: 110 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,116 @@ class StRenderer(SegmentRenderer):
return '{:04d}'.format(data.transaction_set_no) # return value should always be strings
```

### Parsing X12 to JSON

* Quick parse using the top-level API.
```python
from pyx12lib import parse_x12, parse_x12_to_json

x12_data = "ST*997*0001~SE*1*0001~"

# Parse to Python dict
data = parse_x12(x12_data)
# {'segments': [{'segment_id': 'ST', 'elements': [...]}, ...]}

# Parse to JSON string
json_str = parse_x12_to_json(x12_data)
```

* Parse a single segment with explicit grammar.
```python
from pyx12lib.core.parser import SegmentParser
from pyx12lib.common.envelope.grammar import StSegment

parser = SegmentParser("ST*997*0001~", grammar=StSegment)
result = parser.to_dict()
# {'segment_id': 'ST', 'elements': [
# {'reference_designator': 'ST01', 'name': 'Transaction Set Identifier Code', 'value': '997', ...},
# {'reference_designator': 'ST02', 'name': 'Transaction Set Control Number', 'value': '0001', ...},
# ]}
```

* Register custom segment grammars for parsing.
```python
from pyx12lib import GrammarRegistry, X12Parser
from pyx12lib.core.grammar import BaseSegment, Element, element, segment

class MySegment(BaseSegment):
segment_id = 'MY'
usage = segment.USAGE_MANDATORY
max_use = 1
elements = (
Element(
reference_designator='MY01',
name='My Field',
usage=element.USAGE_MANDATORY,
element_type=element.ELEMENT_TYPE_STRING,
minimum=1,
maximum=10,
),
)

registry = GrammarRegistry()
registry.register(MySegment)

parser = X12Parser(registry=registry)
data = parser.parse("MY*hello~MY*world~").to_dict()
```

* Parse with loop definitions to group related segments.
```python
from pyx12lib import X12Parser, LoopDefinition, GrammarRegistry
from pyx12lib.core.grammar import BaseSegment, Element, element, segment

class N1Segment(BaseSegment):
segment_id = 'N1'
usage = segment.USAGE_OPTIONAL
max_use = 99
elements = (
Element(reference_designator='N101', name='Entity Identifier Code',
usage=element.USAGE_MANDATORY, element_type=element.ELEMENT_TYPE_ID,
minimum=2, maximum=3),
)

class N2Segment(BaseSegment):
segment_id = 'N2'
usage = segment.USAGE_OPTIONAL
max_use = 2
elements = (
Element(reference_designator='N201', name='Name',
usage=element.USAGE_MANDATORY, element_type=element.ELEMENT_TYPE_STRING,
minimum=1, maximum=35),
)

registry = GrammarRegistry()
registry.register_loop(LoopDefinition(N1Segment, [N2Segment]))

parser = X12Parser(registry=registry)
data = parser.parse("N1*CA~N1*SH~N2*ACME CORP~N1*CN~").to_dict()
# {'segments': [
# {'loop_id': 'N1', 'segments': [{'segment_id': 'N1', 'elements': [...]}]},
# {'loop_id': 'N1', 'segments': [
# {'segment_id': 'N1', 'elements': [...]},
# {'segment_id': 'N2', 'elements': [...]},
# ]},
# {'loop_id': 'N1', 'segments': [{'segment_id': 'N1', 'elements': [...]}]},
# ]}
```

* Auto-detect delimiters from ISA header.
```python
from pyx12lib import parse_x12

# Delimiters are automatically detected from the ISA segment
x12_data = (
"ISA*00* *00* *ZZ*SENDER "
"*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*>~"
"GS*FA*SENDER*RECEIVER*20210101*1200*1*X*005010~"
"ST*997*0001~SE*1*0001~GE*1*1~IEA*1*000000001~"
)
data = parse_x12(x12_data)
```

---
## Test
```bash
Expand Down
35 changes: 35 additions & 0 deletions pyx12lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pyx12lib.core.grammar.loop import LoopDefinition
from pyx12lib.core.parser import SegmentParser, X12Parser, X12ParseResult
from pyx12lib.core.registry import GrammarRegistry, create_default_registry
from pyx12lib.core.delimiters import detect_delimiters, Delimiters


def parse_x12(x12_string, registry=None):
"""Parse an X12 string into a Python dict.

Args:
x12_string: Raw X12 EDI string.
registry: Optional GrammarRegistry. Uses default envelope
segments (ISA/IEA/GS/GE/ST/SE) if not provided.

Returns:
Dict with 'segments' key containing list of parsed segment dicts.
"""
parser = X12Parser(registry=registry)
return parser.parse(x12_string).to_dict()


def parse_x12_to_json(x12_string, indent=None, registry=None):
"""Parse an X12 string into a JSON string.

Args:
x12_string: Raw X12 EDI string.
indent: JSON indentation level. None for compact output.
registry: Optional GrammarRegistry. Uses default envelope
segments (ISA/IEA/GS/GE/ST/SE) if not provided.

Returns:
JSON string representation of the parsed X12 data.
"""
parser = X12Parser(registry=registry)
return parser.parse(x12_string).to_json(indent=indent)
94 changes: 94 additions & 0 deletions pyx12lib/core/delimiters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Delimiter detection from ISA segment header.

In X12, the ISA segment defines the delimiters used throughout the interchange:
- Character at position 3 is the element delimiter (typically '*')
- ISA16 value is the component separator (typically '^')
- Character immediately after the ISA segment is the segment terminator (typically '~')
"""

from pyx12lib.core.grammar.segment import ELEMENT_DELIMITER, SEGMENT_TERMINATOR
from pyx12lib.core.grammar.element import COMPONENT_DELIMITER

# ISA segment is always exactly 106 characters (including the segment terminator)
ISA_SEGMENT_LENGTH = 106


class Delimiters(object):
"""Container for X12 delimiter characters."""

def __init__(self, element_delimiter, component_delimiter, segment_terminator):
self.element_delimiter = element_delimiter
self.component_delimiter = component_delimiter
self.segment_terminator = segment_terminator

def __eq__(self, other):
if not isinstance(other, Delimiters):
return NotImplemented
return (
self.element_delimiter == other.element_delimiter
and self.component_delimiter == other.component_delimiter
and self.segment_terminator == other.segment_terminator
)

def __repr__(self):
return "Delimiters(element='{}', component='{}', terminator='{}')".format(
self.element_delimiter, self.component_delimiter, self.segment_terminator
)


DEFAULT_DELIMITERS = Delimiters(
element_delimiter=ELEMENT_DELIMITER,
component_delimiter=COMPONENT_DELIMITER,
segment_terminator=SEGMENT_TERMINATOR,
)


def detect_delimiters(raw_x12):
"""Detect delimiters from the ISA segment header.

Args:
raw_x12: Raw X12 string starting with ISA.

Returns:
Delimiters object with the detected characters.

Raises:
ValueError: If the string doesn't start with ISA or is too short.
"""
stripped = raw_x12.lstrip()
if not stripped.startswith('ISA'):
raise ValueError("X12 data must start with ISA segment for delimiter detection")

if len(stripped) < ISA_SEGMENT_LENGTH:
raise ValueError(
"ISA segment requires at least {} characters, got {}".format(
ISA_SEGMENT_LENGTH, len(stripped)
)
)

element_delimiter = stripped[3]
segment_terminator = stripped[ISA_SEGMENT_LENGTH - 1]

# ISA16 is the component separator. Count 16 element delimiters to find it.
# ISA has exactly 16 elements, so there are 16 element delimiters.
# The component separator is the value of the 16th element (ISA16),
# which is between the 16th delimiter and the segment terminator.
delimiter_count = 0
for i, ch in enumerate(stripped):
if ch == element_delimiter:
delimiter_count += 1
if delimiter_count == 16:
if i + 1 >= len(stripped):
raise ValueError(
"ISA segment truncated after 16th element delimiter"
)
component_delimiter = stripped[i + 1]
break
else:
raise ValueError("Could not find 16 element delimiters in ISA segment")

return Delimiters(
element_delimiter=element_delimiter,
component_delimiter=component_delimiter,
segment_terminator=segment_terminator,
)
1 change: 1 addition & 0 deletions pyx12lib/core/grammar/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .element import Component, CompositeElement, Element, NotUsedElement
from .loop import LoopDefinition
from .segment import BaseSegment
90 changes: 90 additions & 0 deletions pyx12lib/core/grammar/loop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from pyx12lib.core.grammar.segment import BaseSegment


class LoopDefinition(object):
"""Defines a loop structure for X12 parsing.

A loop starts with a specific segment type and contains child segments
and/or nested loop definitions.

Args:
start_segment: BaseSegment subclass that triggers a new loop instance.
children: List of BaseSegment subclasses or nested LoopDefinition objects.

Raises:
TypeError: If start_segment is not a BaseSegment subclass.
TypeError: If any child is neither a BaseSegment subclass nor LoopDefinition.
"""

def __init__(self, start_segment, children=None):
if not _is_segment_class(start_segment):
raise TypeError(
"start_segment must be a BaseSegment subclass, got {}".format(
type(start_segment)
)
)

self._start_segment = start_segment
self._children = children or []

self._child_segment_grammars = []
self._child_loops = []

for child in self._children:
if isinstance(child, LoopDefinition):
self._child_loops.append(child)
elif _is_segment_class(child):
self._child_segment_grammars.append(child)
else:
raise TypeError(
"Each child must be a BaseSegment subclass or "
"LoopDefinition, got {}".format(type(child))
)

self._child_segment_ids = frozenset(
g.segment_id for g in self._child_segment_grammars
)

@property
def loop_id(self):
return self._start_segment.segment_id

@property
def start_segment_id(self):
return self._start_segment.segment_id

@property
def start_segment_grammar(self):
return self._start_segment

@property
def child_segment_ids(self):
return self._child_segment_ids

@property
def child_segment_grammars(self):
return list(self._child_segment_grammars)

@property
def child_loops(self):
return list(self._child_loops)

def is_start(self, segment_id):
return segment_id == self._start_segment.segment_id

def is_child(self, segment_id):
return segment_id in self._child_segment_ids

def all_segment_grammars(self):
"""Yield all grammar classes recursively (start + children + nested)."""
yield self._start_segment
for grammar in self._child_segment_grammars:
yield grammar
for child_loop in self._child_loops:
for grammar in child_loop.all_segment_grammars():
yield grammar


def _is_segment_class(obj):
"""Check if obj is a class that is a subclass of BaseSegment."""
return isinstance(obj, type) and issubclass(obj, BaseSegment)
Loading