daler · daler · Jan 26, 2026 · Jan 26, 2026 · Jan 26, 2026 · Jan 26, 2026
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -1,5 +1,13 @@
 name: main
-on: [push]
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
 jobs:
   build-and-test:
     strategy:

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+env/
 *.swo
 *gfffeature.so
 *.swp

diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
@@ -3,6 +3,35 @@
 Change log
 ==========
 
+
+v0.14
+-----
+
+- If a value contained a semicolon there would be unexpected behavior (reported
+  in `#212 <https://github.com/daler/gffutils/issues/212>`__). This is solved
+  by adding a new entry to the dialect, ``semicolon in quotes```, and running
+  the necessary regular expression only -- thanks to @DevangThakkar for the
+  fix.
+- Refactored the attributes parsing to make it clearer to follow along, and
+  added more tests. The refactoring fixed some subtle bugs on corner cases:
+  - Previously, for features with repeated keys, the ``order`` key of dialects
+    would list the repeated keys each time which could result in undetermined
+    behavior. The ``order`` key is now unique and only the first occurrence of
+    a repeated key will be added to the order.
+  - Previously, the ``ensembl_gtf.txt`` example file had a leading *space* in
+    front of the attributes. This looks to be an error in the creation of the
+    example file in the first place, but had previously parsed fine. Now the
+    parser (correctly) mis-handles it. Since I'm unaware of any cases in the
+    wild that have a leading space, I actually consider the new parsing to be
+    more correct.
+  - Added tests to directly inspect the inferred dialects for the test cases.
+- CI, testing, and docs infrastructure updates (miniforge instead of
+  mambaforge; GitHub Action version bumps; skip biopython test if it's not
+  installed; reduce build errors for docs)
+- Fix `#224 <https://github.com/daler/gffutils/issues/224>`__), which was cause
+  by changes to the ``argh`` package used for the command-line tool.
+
+
 v0.13
 -----
 

diff --git a/doc/source/dialect.rst b/doc/source/dialect.rst
@@ -38,7 +38,8 @@ A GTF dialect might look like this::
      'multival separator': ',',
      'quoted GFF2 values': True,
      'repeated keys': False,
-     'trailing semicolon': True}
+     'trailing semicolon': True,
+     'semicolon_in_quotes': False}
 
 In contrast, a GFF dialect might look like this::
 
@@ -49,7 +50,9 @@ In contrast, a GFF dialect might look like this::
      'multival separator': ',',
      'quoted GFF2 values': False,
      'repeated keys': False,
-     'trailing semicolon': False}
+     'trailing semicolon': False,
+     'semicolon_in_quotes': False}
+
 
 As other real-world files are brought to the attention of the developers, it's
 likely that more entries will be added to the dialect.
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
@@ -235,7 +235,7 @@ data upon import into the database:
 ...     return x
 
 
-Now we can supply this tranform function to :func:`create_db`:
+Now we can supply this transform function to :func:`create_db`:
 
 >>> fn = gffutils.example_filename('ensembl_gtf.txt')
 >>> db = gffutils.create_db(fn, ":memory:",
@@ -643,8 +643,8 @@ attributes to have the same format.  To help with this, we can use the
 >>> dialect = helpers.infer_dialect(
 ... 'Transcript "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138" ; CDS "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138"',
 ... )
->>> print(dialect)
-{'leading semicolon': False, 'trailing semicolon': False, 'quoted GFF2 values': True, 'field separator': ' ; ', 'keyval separator': ' ', 'multival separator': ',', 'fmt': 'gtf', 'repeated keys': True, 'order': ['Transcript', 'WormPep', 'Note', 'Prediction_status', 'Gene', 'CDS', 'WormPep', 'Note', 'Prediction_status', 'Gene']}
+>>> print({k: v for k, v in sorted(dialect.items())})
+{'field separator': ' ; ', 'fmt': 'gtf', 'keyval separator': ' ', 'leading semicolon': False, 'multival separator': ',', 'order': ['Transcript', 'WormPep', 'Note', 'Prediction_status', 'Gene', 'CDS'], 'quoted GFF2 values': True, 'repeated keys': True, 'semicolon in quotes': False, 'trailing semicolon': False}
 
 >>> db.dialect = dialect
 

diff --git a/gffutils/constants.py b/gffutils/constants.py
@@ -127,6 +127,12 @@
     # vs
     #   ID=001; Name=gene1
     "field separator": ";",
+    # Sometimes there are semicolons inside quotes that break things, e.g.,
+    #
+    #   note "Evidence 1a: Function1, Function2"
+    # vs
+    #   note "Evidence 1a: Function; PubMedId: 123, 456"
+    "semicolon in quotes": False,
     # Usually "=" for GFF3; " " for GTF, e.g.,
     #
     #   gene_id "GENE1"
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    env/
     *.swo
     *gfffeature.so
     *.swp
@@ Expand Down @@