Skip to content

Commit ae01b7d

Browse files
samukwekuericmjl
andauthored
[INF/DOC] Documentation Rendering fix (#1148)
* docs update * doc fix Co-authored-by: Eric Ma <[email protected]>
1 parent 51f20ca commit ae01b7d

File tree

4 files changed

+168
-165
lines changed

4 files changed

+168
-165
lines changed

janitor/biology.py

+26-26
Original file line numberDiff line numberDiff line change
@@ -41,32 +41,32 @@ def join_fasta(
4141
4242
Method chaining usage example:
4343
44-
>>> import tempfile
45-
>>> import pandas as pd
46-
>>> import janitor.biology
47-
48-
>>> tf = tempfile.NamedTemporaryFile()
49-
>>> tf.write('''>SEQUENCE_1
50-
... MTEITAAMVKELRESTGAGMMDCK
51-
... >SEQUENCE_2
52-
... SATVSEINSETDFVAKN'''.encode('utf8'))
53-
66
54-
>>> tf.seek(0)
55-
0
56-
57-
>>> df = pd.DataFrame({"sequence_accession":
58-
... ["SEQUENCE_1", "SEQUENCE_2", ]})
59-
60-
>>> df = df.join_fasta(
61-
... filename=tf.name,
62-
... id_col='sequence_accession',
63-
... column_name='sequence',
64-
... )
65-
66-
>>> df.sequence
67-
0 MTEITAAMVKELRESTGAGMMDCK
68-
1 SATVSEINSETDFVAKN
69-
Name: sequence, dtype: object
44+
>>> import tempfile
45+
>>> import pandas as pd
46+
>>> import janitor.biology
47+
48+
>>> tf = tempfile.NamedTemporaryFile()
49+
>>> tf.write('''>SEQUENCE_1
50+
... MTEITAAMVKELRESTGAGMMDCK
51+
... >SEQUENCE_2
52+
... SATVSEINSETDFVAKN'''.encode('utf8'))
53+
66
54+
>>> tf.seek(0)
55+
0
56+
57+
>>> df = pd.DataFrame({"sequence_accession":
58+
... ["SEQUENCE_1", "SEQUENCE_2", ]})
59+
60+
>>> df = df.join_fasta(
61+
... filename=tf.name,
62+
... id_col='sequence_accession',
63+
... column_name='sequence',
64+
... )
65+
66+
>>> df.sequence
67+
0 MTEITAAMVKELRESTGAGMMDCK
68+
1 SATVSEINSETDFVAKN
69+
Name: sequence, dtype: object
7070
7171
:param df: A pandas DataFrame.
7272
:param filename: Path to the FASTA file.

janitor/chemistry.py

+124-124
Original file line numberDiff line numberDiff line change
@@ -94,34 +94,34 @@ def smiles2mol(
9494
9595
Functional usage example:
9696
97-
>>> import pandas as pd
98-
>>> import janitor.chemistry
97+
>>> import pandas as pd
98+
>>> import janitor.chemistry
9999
100-
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
100+
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
101101
102-
>>> df = janitor.chemistry.smiles2mol(
103-
... df=df,
104-
... smiles_column_name='smiles',
105-
... mols_column_name='mols'
106-
... )
102+
>>> df = janitor.chemistry.smiles2mol(
103+
... df=df,
104+
... smiles_column_name='smiles',
105+
... mols_column_name='mols'
106+
... )
107107
108-
>>> df.mols[0].GetNumAtoms(), df.mols[0].GetNumBonds()
109-
(3, 2)
110-
>>> df.mols[1].GetNumAtoms(), df.mols[1].GetNumBonds()
111-
(5, 4)
108+
>>> df.mols[0].GetNumAtoms(), df.mols[0].GetNumBonds()
109+
(3, 2)
110+
>>> df.mols[1].GetNumAtoms(), df.mols[1].GetNumBonds()
111+
(5, 4)
112112
113113
Method chaining usage example:
114114
115-
>>> import pandas as pd
116-
>>> import janitor.chemistry
115+
>>> import pandas as pd
116+
>>> import janitor.chemistry
117117
118-
>>> df = df.smiles2mol(
119-
... smiles_column_name='smiles',
120-
... mols_column_name='rdkmol'
121-
... )
118+
>>> df = df.smiles2mol(
119+
... smiles_column_name='smiles',
120+
... mols_column_name='rdkmol'
121+
... )
122122
123-
>>> df.rdkmol[0].GetNumAtoms(), df.rdkmol[0].GetNumBonds()
124-
(3, 2)
123+
>>> df.rdkmol[0].GetNumAtoms(), df.rdkmol[0].GetNumBonds()
124+
(3, 2)
125125
126126
A progressbar can be optionally used.
127127
@@ -184,78 +184,78 @@ def morgan_fingerprint(
184184
185185
Functional usage example:
186186
187-
>>> import pandas as pd
188-
>>> import janitor.chemistry
187+
>>> import pandas as pd
188+
>>> import janitor.chemistry
189189
190-
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
190+
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
191191
192-
# For "counts" kind
193-
>>> morgans = janitor.chemistry.morgan_fingerprint(
194-
... df=df.smiles2mol('smiles', 'mols'),
195-
... mols_column_name='mols',
196-
... radius=3, # Defaults to 3
197-
... nbits=2048, # Defaults to 2048
198-
... kind='counts' # Defaults to "counts"
199-
... )
192+
# For "counts" kind
193+
>>> morgans = janitor.chemistry.morgan_fingerprint(
194+
... df=df.smiles2mol('smiles', 'mols'),
195+
... mols_column_name='mols',
196+
... radius=3, # Defaults to 3
197+
... nbits=2048, # Defaults to 2048
198+
... kind='counts' # Defaults to "counts"
199+
... )
200200
201-
>>> set(morgans.iloc[0])
202-
{0.0, 1.0, 2.0}
201+
>>> set(morgans.iloc[0])
202+
{0.0, 1.0, 2.0}
203203
204-
# For "bits" kind
205-
>>> morgans = janitor.chemistry.morgan_fingerprint(
206-
... df=df.smiles2mol('smiles', 'mols'),
207-
... mols_column_name='mols',
208-
... radius=3, # Defaults to 3
209-
... nbits=2048, # Defaults to 2048
210-
... kind='bits' # Defaults to "counts"
211-
... )
204+
# For "bits" kind
205+
>>> morgans = janitor.chemistry.morgan_fingerprint(
206+
... df=df.smiles2mol('smiles', 'mols'),
207+
... mols_column_name='mols',
208+
... radius=3, # Defaults to 3
209+
... nbits=2048, # Defaults to 2048
210+
... kind='bits' # Defaults to "counts"
211+
... )
212212
213-
>>> set(morgans.iloc[0])
214-
{0.0, 1.0}
213+
>>> set(morgans.iloc[0])
214+
{0.0, 1.0}
215215
216216
Method chaining usage example:
217217
218-
>>> import pandas as pd
219-
>>> import janitor.chemistry
220-
221-
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
222-
223-
# For "counts" kind
224-
>>> morgans = (
225-
... df.smiles2mol('smiles', 'mols')
226-
... .morgan_fingerprint(
227-
... mols_column_name='mols',
228-
... radius=3, # Defaults to 3
229-
... nbits=2048, # Defaults to 2048
230-
... kind='counts' # Defaults to "counts"
231-
... )
232-
... )
233-
234-
>>> set(morgans.iloc[0])
235-
{0.0, 1.0, 2.0}
236-
237-
# For "bits" kind
238-
>>> morgans = (
239-
... df
240-
... .smiles2mol('smiles', 'mols')
241-
... .morgan_fingerprint(
242-
... mols_column_name='mols',
243-
... radius=3, # Defaults to 3
244-
... nbits=2048, # Defaults to 2048
245-
... kind='bits' # Defaults to "counts"
246-
... )
247-
... )
248-
249-
>>> set(morgans.iloc[0])
250-
{0.0, 1.0}
218+
>>> import pandas as pd
219+
>>> import janitor.chemistry
220+
221+
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
222+
223+
# For "counts" kind
224+
>>> morgans = (
225+
... df.smiles2mol('smiles', 'mols')
226+
... .morgan_fingerprint(
227+
... mols_column_name='mols',
228+
... radius=3, # Defaults to 3
229+
... nbits=2048, # Defaults to 2048
230+
... kind='counts' # Defaults to "counts"
231+
... )
232+
... )
233+
234+
>>> set(morgans.iloc[0])
235+
{0.0, 1.0, 2.0}
236+
237+
# For "bits" kind
238+
>>> morgans = (
239+
... df
240+
... .smiles2mol('smiles', 'mols')
241+
... .morgan_fingerprint(
242+
... mols_column_name='mols',
243+
... radius=3, # Defaults to 3
244+
... nbits=2048, # Defaults to 2048
245+
... kind='bits' # Defaults to "counts"
246+
... )
247+
... )
248+
249+
>>> set(morgans.iloc[0])
250+
{0.0, 1.0}
251251
252252
If you wish to join the morgan fingerprints back into the original
253253
dataframe, this can be accomplished by doing a `join`,
254254
because the indices are preserved:
255255
256-
>>> joined = df.join(morgans)
257-
>>> len(joined.columns)
258-
2050
256+
>>> joined = df.join(morgans)
257+
>>> len(joined.columns)
258+
2050
259259
260260
:param df: A pandas DataFrame.
261261
:param mols_column_name: The name of the column that has the RDKIT
@@ -324,47 +324,47 @@ def molecular_descriptors(
324324
325325
Functional usage example:
326326
327-
>>> import pandas as pd
328-
>>> import janitor.chemistry
327+
>>> import pandas as pd
328+
>>> import janitor.chemistry
329329
330-
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
330+
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
331331
332-
>>> mol_desc = (
333-
... janitor.chemistry.molecular_descriptors(
334-
... df=df.smiles2mol('smiles', 'mols'),
335-
... mols_column_name='mols'
336-
... )
337-
... )
332+
>>> mol_desc = (
333+
... janitor.chemistry.molecular_descriptors(
334+
... df=df.smiles2mol('smiles', 'mols'),
335+
... mols_column_name='mols'
336+
... )
337+
... )
338338
339-
>>> mol_desc.TPSA
340-
0 34.14
341-
1 37.30
342-
Name: TPSA, dtype: float64
339+
>>> mol_desc.TPSA
340+
0 34.14
341+
1 37.30
342+
Name: TPSA, dtype: float64
343343
344344
Method chaining usage example:
345345
346-
>>> import pandas as pd
347-
>>> import janitor.chemistry
346+
>>> import pandas as pd
347+
>>> import janitor.chemistry
348348
349-
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
349+
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
350350
351-
>>> mol_desc = (
352-
... df.smiles2mol('smiles', 'mols')
353-
... .molecular_descriptors(mols_column_name='mols')
354-
... )
351+
>>> mol_desc = (
352+
... df.smiles2mol('smiles', 'mols')
353+
... .molecular_descriptors(mols_column_name='mols')
354+
... )
355355
356-
>>> mol_desc.TPSA
357-
0 34.14
358-
1 37.30
359-
Name: TPSA, dtype: float64
356+
>>> mol_desc.TPSA
357+
0 34.14
358+
1 37.30
359+
Name: TPSA, dtype: float64
360360
361361
If you wish to join the molecular descriptors back into the original
362362
dataframe, this can be accomplished by doing a `join`,
363363
because the indices are preserved:
364364
365-
>>> joined = df.join(mol_desc)
366-
>>> len(joined.columns)
367-
41
365+
>>> joined = df.join(mol_desc)
366+
>>> len(joined.columns)
367+
41
368368
369369
:param df: A pandas DataFrame.
370370
:param mols_column_name: The name of the column that has the RDKIT mol
@@ -435,33 +435,33 @@ def maccs_keys_fingerprint(
435435
436436
Functional usage example:
437437
438-
>>> import pandas as pd
439-
>>> import janitor.chemistry
438+
>>> import pandas as pd
439+
>>> import janitor.chemistry
440440
441-
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
441+
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
442442
443-
>>> maccs = janitor.chemistry.maccs_keys_fingerprint(
444-
... df=df.smiles2mol('smiles', 'mols'),
445-
... mols_column_name='mols'
446-
... )
443+
>>> maccs = janitor.chemistry.maccs_keys_fingerprint(
444+
... df=df.smiles2mol('smiles', 'mols'),
445+
... mols_column_name='mols'
446+
... )
447447
448-
>>> len(maccs.columns)
449-
167
448+
>>> len(maccs.columns)
449+
167
450450
451451
Method chaining usage example:
452452
453-
>>> import pandas as pd
454-
>>> import janitor.chemistry
453+
>>> import pandas as pd
454+
>>> import janitor.chemistry
455455
456-
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
456+
>>> df = pd.DataFrame({"smiles": ["O=C=O", "CCC(=O)O"]})
457457
458-
>>> maccs = (
459-
... df.smiles2mol('smiles', 'mols')
460-
... .maccs_keys_fingerprint(mols_column_name='mols')
461-
... )
458+
>>> maccs = (
459+
... df.smiles2mol('smiles', 'mols')
460+
... .maccs_keys_fingerprint(mols_column_name='mols')
461+
... )
462462
463-
>>> len(maccs.columns)
464-
167
463+
>>> len(maccs.columns)
464+
167
465465
466466
If you wish to join the maccs keys fingerprints back into the
467467
original dataframe, this can be accomplished by doing a `join`,

0 commit comments

Comments
 (0)