@@ -93,7 +93,7 @@ def read_csvs(
93
93
return dfs_dict
94
94
95
95
96
- def read_commandline (cmd : str , ** kwargs : Any ) -> pd . DataFrame :
96
+ def read_commandline (cmd : str , engine = "pandas" , ** kwargs : Any ) -> Mapping :
97
97
"""Read a CSV file based on a command-line command.
98
98
99
99
For example, you may wish to run the following command on `sep-quarter.csv`
@@ -111,26 +111,42 @@ def read_commandline(cmd: str, **kwargs: Any) -> pd.DataFrame:
111
111
```
112
112
113
113
This function assumes that your command line command will return
114
- an output that is parsable using `pandas.read_csv` and StringIO.
115
- We default to using `pd.read_csv` underneath the hood.
116
- Keyword arguments are passed through to read_csv .
114
+ an output that is parsable using the relevant engine and StringIO.
115
+ This function defaults to using `pd.read_csv` underneath the hood.
116
+ Keyword arguments are passed through as-is .
117
117
118
118
Args:
119
119
cmd: Shell command to preprocess a file on disk.
120
+ engine: DataFrame engine to process the output of the shell command.
121
+ Currently supports both pandas and polars.
120
122
**kwargs: Keyword arguments that are passed through to
121
- `pd.read_csv()`.
123
+ the engine's csv reader.
124
+
122
125
123
126
Returns:
124
- A pandas DataFrame parsed from the stdout of the underlying
127
+ A DataFrame parsed from the stdout of the underlying
125
128
shell.
126
129
"""
127
130
128
131
check ("cmd" , cmd , [str ])
132
+ if engine not in {"pandas" , "polars" }:
133
+ raise ValueError ("engine should be either pandas or polars." )
129
134
# adding check=True ensures that an explicit, clear error
130
135
# is raised, so that the user can see the reason for the failure
131
136
outcome = subprocess .run (
132
137
cmd , shell = True , capture_output = True , text = True , check = True
133
138
)
139
+ if engine == "polars" :
140
+ try :
141
+ import polars as pl
142
+ except ImportError :
143
+ import_message (
144
+ submodule = "polars" ,
145
+ package = "polars" ,
146
+ conda_channel = "conda-forge" ,
147
+ pip_install = True ,
148
+ )
149
+ return pl .read_csv (StringIO (outcome .stdout ), ** kwargs )
134
150
return pd .read_csv (StringIO (outcome .stdout ), ** kwargs )
135
151
136
152
0 commit comments