@@ -94,34 +94,34 @@ def smiles2mol(
94
94
95
95
Functional usage example:
96
96
97
- >>> import pandas as pd
98
- >>> import janitor.chemistry
97
+ >>> import pandas as pd
98
+ >>> import janitor.chemistry
99
99
100
- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
100
+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
101
101
102
- >>> df = janitor.chemistry.smiles2mol(
103
- ... df=df,
104
- ... smiles_column_name='smiles',
105
- ... mols_column_name='mols'
106
- ... )
102
+ >>> df = janitor.chemistry.smiles2mol(
103
+ ... df=df,
104
+ ... smiles_column_name='smiles',
105
+ ... mols_column_name='mols'
106
+ ... )
107
107
108
- >>> df.mols[0].GetNumAtoms(), df.mols[0].GetNumBonds()
109
- (3, 2)
110
- >>> df.mols[1].GetNumAtoms(), df.mols[1].GetNumBonds()
111
- (5, 4)
108
+ >>> df.mols[0].GetNumAtoms(), df.mols[0].GetNumBonds()
109
+ (3, 2)
110
+ >>> df.mols[1].GetNumAtoms(), df.mols[1].GetNumBonds()
111
+ (5, 4)
112
112
113
113
Method chaining usage example:
114
114
115
- >>> import pandas as pd
116
- >>> import janitor.chemistry
115
+ >>> import pandas as pd
116
+ >>> import janitor.chemistry
117
117
118
- >>> df = df.smiles2mol(
119
- ... smiles_column_name='smiles',
120
- ... mols_column_name='rdkmol'
121
- ... )
118
+ >>> df = df.smiles2mol(
119
+ ... smiles_column_name='smiles',
120
+ ... mols_column_name='rdkmol'
121
+ ... )
122
122
123
- >>> df.rdkmol[0].GetNumAtoms(), df.rdkmol[0].GetNumBonds()
124
- (3, 2)
123
+ >>> df.rdkmol[0].GetNumAtoms(), df.rdkmol[0].GetNumBonds()
124
+ (3, 2)
125
125
126
126
A progressbar can be optionally used.
127
127
@@ -184,78 +184,78 @@ def morgan_fingerprint(
184
184
185
185
Functional usage example:
186
186
187
- >>> import pandas as pd
188
- >>> import janitor.chemistry
187
+ >>> import pandas as pd
188
+ >>> import janitor.chemistry
189
189
190
- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
190
+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
191
191
192
- # For "counts" kind
193
- >>> morgans = janitor.chemistry.morgan_fingerprint(
194
- ... df=df.smiles2mol('smiles', 'mols'),
195
- ... mols_column_name='mols',
196
- ... radius=3, # Defaults to 3
197
- ... nbits=2048, # Defaults to 2048
198
- ... kind='counts' # Defaults to "counts"
199
- ... )
192
+ # For "counts" kind
193
+ >>> morgans = janitor.chemistry.morgan_fingerprint(
194
+ ... df=df.smiles2mol('smiles', 'mols'),
195
+ ... mols_column_name='mols',
196
+ ... radius=3, # Defaults to 3
197
+ ... nbits=2048, # Defaults to 2048
198
+ ... kind='counts' # Defaults to "counts"
199
+ ... )
200
200
201
- >>> set(morgans.iloc[0])
202
- {0.0, 1.0, 2.0}
201
+ >>> set(morgans.iloc[0])
202
+ {0.0, 1.0, 2.0}
203
203
204
- # For "bits" kind
205
- >>> morgans = janitor.chemistry.morgan_fingerprint(
206
- ... df=df.smiles2mol('smiles', 'mols'),
207
- ... mols_column_name='mols',
208
- ... radius=3, # Defaults to 3
209
- ... nbits=2048, # Defaults to 2048
210
- ... kind='bits' # Defaults to "counts"
211
- ... )
204
+ # For "bits" kind
205
+ >>> morgans = janitor.chemistry.morgan_fingerprint(
206
+ ... df=df.smiles2mol('smiles', 'mols'),
207
+ ... mols_column_name='mols',
208
+ ... radius=3, # Defaults to 3
209
+ ... nbits=2048, # Defaults to 2048
210
+ ... kind='bits' # Defaults to "counts"
211
+ ... )
212
212
213
- >>> set(morgans.iloc[0])
214
- {0.0, 1.0}
213
+ >>> set(morgans.iloc[0])
214
+ {0.0, 1.0}
215
215
216
216
Method chaining usage example:
217
217
218
- >>> import pandas as pd
219
- >>> import janitor.chemistry
220
-
221
- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
222
-
223
- # For "counts" kind
224
- >>> morgans = (
225
- ... df.smiles2mol('smiles', 'mols')
226
- ... .morgan_fingerprint(
227
- ... mols_column_name='mols',
228
- ... radius=3, # Defaults to 3
229
- ... nbits=2048, # Defaults to 2048
230
- ... kind='counts' # Defaults to "counts"
231
- ... )
232
- ... )
233
-
234
- >>> set(morgans.iloc[0])
235
- {0.0, 1.0, 2.0}
236
-
237
- # For "bits" kind
238
- >>> morgans = (
239
- ... df
240
- ... .smiles2mol('smiles', 'mols')
241
- ... .morgan_fingerprint(
242
- ... mols_column_name='mols',
243
- ... radius=3, # Defaults to 3
244
- ... nbits=2048, # Defaults to 2048
245
- ... kind='bits' # Defaults to "counts"
246
- ... )
247
- ... )
248
-
249
- >>> set(morgans.iloc[0])
250
- {0.0, 1.0}
218
+ >>> import pandas as pd
219
+ >>> import janitor.chemistry
220
+
221
+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
222
+
223
+ # For "counts" kind
224
+ >>> morgans = (
225
+ ... df.smiles2mol('smiles', 'mols')
226
+ ... .morgan_fingerprint(
227
+ ... mols_column_name='mols',
228
+ ... radius=3, # Defaults to 3
229
+ ... nbits=2048, # Defaults to 2048
230
+ ... kind='counts' # Defaults to "counts"
231
+ ... )
232
+ ... )
233
+
234
+ >>> set(morgans.iloc[0])
235
+ {0.0, 1.0, 2.0}
236
+
237
+ # For "bits" kind
238
+ >>> morgans = (
239
+ ... df
240
+ ... .smiles2mol('smiles', 'mols')
241
+ ... .morgan_fingerprint(
242
+ ... mols_column_name='mols',
243
+ ... radius=3, # Defaults to 3
244
+ ... nbits=2048, # Defaults to 2048
245
+ ... kind='bits' # Defaults to "counts"
246
+ ... )
247
+ ... )
248
+
249
+ >>> set(morgans.iloc[0])
250
+ {0.0, 1.0}
251
251
252
252
If you wish to join the morgan fingerprints back into the original
253
253
dataframe, this can be accomplished by doing a `join`,
254
254
because the indices are preserved:
255
255
256
- >>> joined = df.join(morgans)
257
- >>> len(joined.columns)
258
- 2050
256
+ >>> joined = df.join(morgans)
257
+ >>> len(joined.columns)
258
+ 2050
259
259
260
260
:param df: A pandas DataFrame.
261
261
:param mols_column_name: The name of the column that has the RDKIT
@@ -324,47 +324,47 @@ def molecular_descriptors(
324
324
325
325
Functional usage example:
326
326
327
- >>> import pandas as pd
328
- >>> import janitor.chemistry
327
+ >>> import pandas as pd
328
+ >>> import janitor.chemistry
329
329
330
- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
330
+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
331
331
332
- >>> mol_desc = (
333
- ... janitor.chemistry.molecular_descriptors(
334
- ... df=df.smiles2mol('smiles', 'mols'),
335
- ... mols_column_name='mols'
336
- ... )
337
- ... )
332
+ >>> mol_desc = (
333
+ ... janitor.chemistry.molecular_descriptors(
334
+ ... df=df.smiles2mol('smiles', 'mols'),
335
+ ... mols_column_name='mols'
336
+ ... )
337
+ ... )
338
338
339
- >>> mol_desc.TPSA
340
- 0 34.14
341
- 1 37.30
342
- Name: TPSA, dtype: float64
339
+ >>> mol_desc.TPSA
340
+ 0 34.14
341
+ 1 37.30
342
+ Name: TPSA, dtype: float64
343
343
344
344
Method chaining usage example:
345
345
346
- >>> import pandas as pd
347
- >>> import janitor.chemistry
346
+ >>> import pandas as pd
347
+ >>> import janitor.chemistry
348
348
349
- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
349
+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
350
350
351
- >>> mol_desc = (
352
- ... df.smiles2mol('smiles', 'mols')
353
- ... .molecular_descriptors(mols_column_name='mols')
354
- ... )
351
+ >>> mol_desc = (
352
+ ... df.smiles2mol('smiles', 'mols')
353
+ ... .molecular_descriptors(mols_column_name='mols')
354
+ ... )
355
355
356
- >>> mol_desc.TPSA
357
- 0 34.14
358
- 1 37.30
359
- Name: TPSA, dtype: float64
356
+ >>> mol_desc.TPSA
357
+ 0 34.14
358
+ 1 37.30
359
+ Name: TPSA, dtype: float64
360
360
361
361
If you wish to join the molecular descriptors back into the original
362
362
dataframe, this can be accomplished by doing a `join`,
363
363
because the indices are preserved:
364
364
365
- >>> joined = df.join(mol_desc)
366
- >>> len(joined.columns)
367
- 41
365
+ >>> joined = df.join(mol_desc)
366
+ >>> len(joined.columns)
367
+ 41
368
368
369
369
:param df: A pandas DataFrame.
370
370
:param mols_column_name: The name of the column that has the RDKIT mol
@@ -435,33 +435,33 @@ def maccs_keys_fingerprint(
435
435
436
436
Functional usage example:
437
437
438
- >>> import pandas as pd
439
- >>> import janitor.chemistry
438
+ >>> import pandas as pd
439
+ >>> import janitor.chemistry
440
440
441
- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
441
+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
442
442
443
- >>> maccs = janitor.chemistry.maccs_keys_fingerprint(
444
- ... df=df.smiles2mol('smiles', 'mols'),
445
- ... mols_column_name='mols'
446
- ... )
443
+ >>> maccs = janitor.chemistry.maccs_keys_fingerprint(
444
+ ... df=df.smiles2mol('smiles', 'mols'),
445
+ ... mols_column_name='mols'
446
+ ... )
447
447
448
- >>> len(maccs.columns)
449
- 167
448
+ >>> len(maccs.columns)
449
+ 167
450
450
451
451
Method chaining usage example:
452
452
453
- >>> import pandas as pd
454
- >>> import janitor.chemistry
453
+ >>> import pandas as pd
454
+ >>> import janitor.chemistry
455
455
456
- >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
456
+ >>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
457
457
458
- >>> maccs = (
459
- ... df.smiles2mol('smiles', 'mols')
460
- ... .maccs_keys_fingerprint(mols_column_name='mols')
461
- ... )
458
+ >>> maccs = (
459
+ ... df.smiles2mol('smiles', 'mols')
460
+ ... .maccs_keys_fingerprint(mols_column_name='mols')
461
+ ... )
462
462
463
- >>> len(maccs.columns)
464
- 167
463
+ >>> len(maccs.columns)
464
+ 167
465
465
466
466
If you wish to join the maccs keys fingerprints back into the
467
467
original dataframe, this can be accomplished by doing a `join`,
0 commit comments