daler · daler · Jan 17, 2026 · Jan 17, 2026 · Jan 17, 2026 · Jan 17, 2026
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -1,5 +1,13 @@
 name: main
-on: [push]
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
 jobs:
   build-and-test:
     strategy:
@@ -18,9 +26,8 @@ jobs:
 
       - name: conda env
         run: |
-          wget -O Mambaforge.sh  "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
-          curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
-          bash Mambaforge.sh -b -p "${HOME}/conda"
+          wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+          bash Miniforge3.sh -b -p "${HOME}/conda"
           source "${HOME}/conda/etc/profile.d/conda.sh"
           source "${HOME}/conda/etc/profile.d/mamba.sh"
           which conda
@@ -102,7 +109,7 @@ jobs:
 
       - name: push artifact
         if: ${{ (matrix.python-version == 3.9) }}
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: doc
           path: /tmp/docs

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+env/
 *.swo
 *gfffeature.so
 *.swp

diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -117,10 +117,10 @@ Integration with other tools
     :toctree: autodocs
     :nosignatures:
 
-    gffutils.biopython_integration.to_seqfeature
-    gffutils.biopython_integration.from_seqfeature
-    gffutils.pybedtools_integration.tsses
-    gffutils.pybedtools_integration.to_bedtool
+    biopython_integration.to_seqfeature
+    biopython_integration.from_seqfeature
+    pybedtools_integration.tsses
+    pybedtools_integration.to_bedtool
 
 
 
@@ -131,10 +131,10 @@ Utilities
     :toctree: autodocs
     :nosignatures:
 
-    gffutils.helpers.asinterval
-    gffutils.helpers.merge_attributes
-    gffutils.helpers.sanitize_gff_db
-    gffutils.helpers.annotate_gff_db
-    gffutils.helpers.infer_dialect
-    gffutils.helpers.example_filename
-    gffutils.inspect.inspect
+    helpers.asinterval
+    helpers.merge_attributes
+    helpers.sanitize_gff_db
+    helpers.annotate_gff_db
+    helpers.infer_dialect
+    helpers.example_filename
+    inspect.inspect
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
@@ -3,6 +3,35 @@
 Change log
 ==========
 
+
+v0.14
+-----
+
+- If a value contained a semicolon there would be unexpected behavior (reported
+  in `#212 <https://github.com/daler/gffutils/issues/212>`__). This is solved
+  by adding a new entry to the dialect, ``semicolon in quotes```, and running
+  the necessary regular expression only -- thanks to @DevangThakkar for the
+  fix.
+- Refactored the attributes parsing to make it clearer to follow along, and
+  added more tests. The refactoring fixed some subtle bugs on corner cases:
+  - Previously, for features with repeated keys, the ``order`` key of dialects
+    would list the repeated keys each time which could result in undetermined
+    behavior. The ``order`` key is now unique and only the first occurrence of
+    a repeated key will be added to the order.
+  - Previously, the ``ensembl_gtf.txt`` example file had a leading *space* in
+    front of the attributes. This looks to be an error in the creation of the
+    example file in the first place, but had previously parsed fine. Now the
+    parser (correctly) mis-handles it. Since I'm unaware of any cases in the
+    wild that have a leading space, I actually consider the new parsing to be
+    more correct.
+  - Added tests to directly inspect the inferred dialects for the test cases.
+- CI, testing, and docs infrastructure updates (miniforge instead of
+  mambaforge; GitHub Action version bumps; skip biopython test if it's not
+  installed; reduce build errors for docs)
+- Fix `#224 <https://github.com/daler/gffutils/issues/224>`__), which was cause
+  by changes to the ``argh`` package used for the command-line tool.
+
+
 v0.13
 -----
 

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -53,5 +53,3 @@
 templates_path = ['_templates']
 exclude_patterns = []
 html_theme = 'sphinx_rtd_theme'
-html_static_path = ['_static']
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
diff --git a/doc/source/dialect.rst b/doc/source/dialect.rst
@@ -38,7 +38,8 @@ A GTF dialect might look like this::
      'multival separator': ',',
      'quoted GFF2 values': True,
      'repeated keys': False,
-     'trailing semicolon': True}
+     'trailing semicolon': True,
+     'semicolon_in_quotes': False}
 
 In contrast, a GFF dialect might look like this::
 
@@ -49,7 +50,9 @@ In contrast, a GFF dialect might look like this::
      'multival separator': ',',
      'quoted GFF2 values': False,
      'repeated keys': False,
-     'trailing semicolon': False}
+     'trailing semicolon': False,
+     'semicolon_in_quotes': False}
+
 
 As other real-world files are brought to the attention of the developers, it's
 likely that more entries will be added to the dialect.
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
@@ -235,7 +235,7 @@ data upon import into the database:
 ...     return x
 
 
-Now we can supply this tranform function to :func:`create_db`:
+Now we can supply this transform function to :func:`create_db`:
 
 >>> fn = gffutils.example_filename('ensembl_gtf.txt')
 >>> db = gffutils.create_db(fn, ":memory:",
@@ -643,8 +643,8 @@ attributes to have the same format.  To help with this, we can use the
 >>> dialect = helpers.infer_dialect(
 ... 'Transcript "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138" ; CDS "B0019.1" ; WormPep "WP:CE40797" ; Note "amx-2" ; Prediction_status "Partially_confirmed" ; Gene "WBGene00000138"',
 ... )
->>> print(dialect)
-{'leading semicolon': False, 'trailing semicolon': False, 'quoted GFF2 values': True, 'field separator': ' ; ', 'keyval separator': ' ', 'multival separator': ',', 'fmt': 'gtf', 'repeated keys': True, 'order': ['Transcript', 'WormPep', 'Note', 'Prediction_status', 'Gene', 'CDS', 'WormPep', 'Note', 'Prediction_status', 'Gene']}
+>>> print({k: v for k, v in sorted(dialect.items())})
+{'field separator': ' ; ', 'fmt': 'gtf', 'keyval separator': ' ', 'leading semicolon': False, 'multival separator': ',', 'order': ['Transcript', 'WormPep', 'Note', 'Prediction_status', 'Gene', 'CDS'], 'quoted GFF2 values': True, 'repeated keys': True, 'semicolon in quotes': False, 'trailing semicolon': False}
 
 >>> db.dialect = dialect
 

diff --git a/gffutils/constants.py b/gffutils/constants.py
@@ -127,6 +127,12 @@
     # vs
     #   ID=001; Name=gene1
     "field separator": ";",
+    # Sometimes there are semicolons inside quotes that break things, e.g.,
+    #
+    #   note "Evidence 1a: Function1, Function2"
+    # vs
+    #   note "Evidence 1a: Function; PubMedId: 123, 456"
+    "semicolon in quotes": False,
     # Usually "=" for GFF3; " " for GTF, e.g.,
     #
     #   gene_id "GENE1"

diff --git a/gffutils/interface.py b/gffutils/interface.py
@@ -1285,7 +1285,7 @@ def create_introns(
 
             with open('tmp.gtf', 'w') as fout:
                 for intron in db.create_introns(**intron_kwargs):
-                    fout.write(str(intron) + "\n")
+                    fout.write(str(intron) + "\\n")
             db.update(gffutils.DataIterator('tmp.gtf'), **create_kwargs)
 
         """
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    env/
     *.swo
     *gfffeature.so
     *.swp
@@ Expand Down @@