Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,28 @@ If the JSON string is malformed, the `parse` function will throw an error:
loads("wrong") # MalformedJSON: Malformed node or string on line 1
```

### Handling text around JSON

Sometimes JSON might be embedded in other text. You can use `PREFIX` and `POSTFIX` options to handle this:

```python
from partial_json_parser import loads, PREFIX, POSTFIX

# Handle text before JSON
result = loads('This is your JSON: {"key": "value"}', PREFIX)
print(result) # Outputs: {'key': 'value'}

# Handle text after JSON
result = loads('{"key": "value"} - end of JSON', POSTFIX)
print(result) # Outputs: {'key': 'value'}

# Handle both
result = loads('Start of JSON: {"key": "value"} - end of JSON', PREFIX | POSTFIX)
print(result) # Outputs: {'key': 'value'}
```

Note that `PREFIX` looks for the first `{` or `[` character and `POSTFIX` looks for the last `}` or `]` character to determine the JSON boundaries.

## API Reference

### loads(json_string, [allow_partial], [parser])
Expand Down Expand Up @@ -149,7 +171,9 @@ Enum class that specifies what kind of partialness is allowed during JSON parsin
- `SPECIAL`: Allow all special values.
- `ATOM`: Allow all atomic values.
- `COLLECTION`: Allow all collection values.
- `ALL`: Allow all values.
- `PREFIX`: Allow text before the JSON string starts (e.g. `This is your JSON: {"key": "value"}`).
- `POSTFIX`: Allow text after the JSON string ends (e.g. `{"key": "value"} - end of JSON`).
- `ALL`: Allow all values

## Testing

Expand Down
72 changes: 72 additions & 0 deletions src/partial_json_parser/core/myelin.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,78 @@ def join_closing_tokens(stack: List[Tuple[int, str]]):

def fix_fast(json_string: str, allow_partial: Union[Allow, int] = ALL):
allow = Allow(allow_partial)

# Handle PREFIX by finding first { or [
if PREFIX in allow:
first_brace = json_string.find('{')
first_bracket = json_string.find('[')

if first_brace != -1 and (first_bracket == -1 or first_brace < first_bracket):
json_string = json_string[first_brace:]
elif first_bracket != -1:
json_string = json_string[first_bracket:]

# Handle POSTFIX by finding last } or ]
if POSTFIX in allow:
last_brace = json_string.rfind('}')
last_bracket = json_string.rfind(']')

if last_brace != -1 and (last_bracket == -1 or last_brace > last_bracket):
json_string = json_string[:last_brace + 1]
elif last_bracket != -1:
json_string = json_string[:last_bracket + 1]

# Always enable STR when handling PREFIX/POSTFIX
if PREFIX in allow or POSTFIX in allow:
allow = Allow(allow | STR)

return _fix(json_string, allow, True)


def fix_fast_old(json_string: str, allow_partial: Union[Allow, int] = ALL):
allow = Allow(allow_partial)
original_allow = allow

# Handle PREFIX by finding first { or [
if PREFIX in allow:
first_brace = json_string.find('{')
first_bracket = json_string.find('[')

if first_brace != -1 and (first_bracket == -1 or first_brace < first_bracket):
json_string = json_string[first_brace:]
elif first_bracket != -1:
json_string = json_string[first_bracket:]

# Handle POSTFIX by finding matching closing brace/bracket
if POSTFIX in allow:
# Find opening token
first_char = json_string[0] if json_string else ''
if first_char not in '{[':
# No valid JSON start found
return _fix(json_string, original_allow, True)

# Find matching closing token
closing_char = '}' if first_char == '{' else ']'
stack = []
in_string = False

for i, char in enumerate(json_string):
if char == '"' and (i == 0 or json_string[i-1] != '\\'):
in_string = not in_string
elif not in_string:
if char in '{[':
stack.append(char)
elif char in ']}':
if not stack:
break
if (char == '}' and stack[-1] == '{') or (char == ']' and stack[-1] == '['):
stack.pop()
if not stack: # Found matching closing token
json_string = json_string[:i+1]
break

# Remove PREFIX/POSTFIX from allow since we've handled them
allow = Allow(allow & ~(PREFIX | POSTFIX))

def is_escaped(index: int):
text_before = json_string[:index]
Expand Down
8 changes: 7 additions & 1 deletion src/partial_json_parser/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ class Allow(IntFlag):
NAN = auto()
INFINITY = auto()
_INFINITY = auto()
PREFIX = auto()
POSTFIX = auto()

INF = INFINITY | _INFINITY
SPECIAL = NULL | BOOL | INF | NAN
ATOM = STR | NUM | SPECIAL
COLLECTION = ARR | OBJ
ALL = ATOM | COLLECTION
ALL = ATOM | COLLECTION | PREFIX | POSTFIX


STR = Allow.STR
Expand All @@ -35,6 +37,8 @@ class Allow(IntFlag):
ATOM = Allow.ATOM
COLLECTION = Allow.COLLECTION
ALL = Allow.ALL
PREFIX = Allow.PREFIX
POSTFIX = Allow.POSTFIX


__all__ = [
Expand All @@ -53,4 +57,6 @@ class Allow(IntFlag):
"ATOM",
"COLLECTION",
"ALL",
"PREFIX",
"POSTFIX",
]
Loading