Skip to content

Commit bf55784

Browse files
committed
feat: local config
introduce pydantic config for each ProfileReport object
1 parent 6fc0e7f commit bf55784

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+1934
-1697
lines changed

docsrc/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@ def _GetApiWrapperVersion():
4444
# ones.
4545
extensions = [
4646
"recommonmark",
47-
# "sphinx_multiversion",
4847
"sphinx.ext.autodoc",
4948
"sphinx.ext.autosummary",
5049
"sphinx.ext.coverage",
5150
"sphinx.ext.napoleon",
5251
"sphinx_autodoc_typehints",
5352
"sphinx.ext.viewcode",
53+
"sphinx-pydantic",
5454
]
5555

5656
# Add any paths that contain templates here, relative to this directory.

docsrc/source/pages/advanced_usage.rst

+53-81
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ The configuration can be changed in the following ways:
2121
profile = df.profile_report(title="Pandas Profiling Report", pool_size=1)
2222
2323
# Change the config after
24-
profile.set_variable("html.minify_html", False)
24+
profile.config.html.minify_html = False
2525
2626
profile.to_file("output.html")
2727
@@ -38,26 +38,24 @@ Variable summary settings
3838
:caption: Configuration example
3939
4040
profile = df.profile_report(
41-
sort='ascending',
42-
vars={
43-
'num':{'low_categorical_threshold': 0},
44-
'cat':{
45-
'length':True,
46-
'characters':False,
47-
'words':False,
48-
'n_obs': 5,
49-
}
50-
}
51-
)
52-
53-
profile.set_variable('variables.descriptions',
54-
{
55-
'files': 'Files in the filesystem',
56-
'datec': 'Creation date',
57-
'datem': 'Modification date',
41+
sort='ascending',
42+
vars={
43+
'num':{'low_categorical_threshold': 0},
44+
'cat':{
45+
'length':True,
46+
'characters':False,
47+
'words':False,
48+
'n_obs': 5,
5849
}
50+
}
5951
)
6052
53+
profile.config.variables.descriptions = {
54+
'files': 'Files in the filesystem',
55+
'datec': 'Creation date',
56+
'datem': 'Modification date',
57+
}
58+
6159
profile.to_file("report.html")
6260
6361
@@ -73,10 +71,10 @@ Missing data overview plots
7371
:caption: Configuration example: disable heatmap and dendrogram for large datasets
7472
7573
profile = df.profile_report(
76-
missing_diagrams={
77-
'heatmap': False,
78-
'dendrogram': False,
79-
}
74+
missing_diagrams={
75+
'heatmap': False,
76+
'dendrogram': False,
77+
}
8078
)
8179
profile.to_file("report.html")
8280
@@ -158,82 +156,56 @@ It's possible to disable certain groups of features through configuration shorth
158156
# Disable samples, correlations, missing diagrams and duplicates at once
159157
r = ProfileReport(samples=None, correlations=None, missing_diagrams=None, duplicates=None, interactions=None)
160158
161-
# Or use the .set_variable method
162-
r = ProfileReport()
163-
r.set_variable("samples", None)
164-
r.set_variable("duplicates", None)
165-
r.set_variable("correlations", None)
166-
r.set_variable("missing_diagrams", None)
167-
r.set_variable("interactions", None)
168-
169-
170-
171159
172160
Customise plots
173161
---------------
174-
175-
A way how to pass arguments to the underlying matplotlib is to use the ``plot`` argument. It is possible to change the default format of images to png (default svg) using the key-pair ``image_format: "png"`` and also the resolution of the image using ``dpi: 800``.
176-
162+
A way how to pass arguments to the underlying matplotlib is to use the ``plot`` argument. It is possible to change the default format of images to png (default svg) using the key-pair ``image_format: "png"`` and also the resolution of the image using ``dpi: 800``.
177163
An example would be:
178-
179164
.. code-block:: python
180165
181-
profile = ProfileReport(planets, title='Pandas Profiling Report', explorative=True,
182-
plot={
183-
'dpi':200,
184-
'image_format': 'png'
185-
})
186-
187-
188-
Furthermore, it is possible to change the default values of histograms, the options for that are the following:
189-
190-
histogram:
191-
x_axis_labels: True
192-
193-
# Number of bins (set to 0 to automatically detect the bin size)
194-
bins: 50
195-
196-
# Maximum number of bins (when bins=0)
197-
max_bins: 250
198-
199-
200-
201-
166+
profile = ProfileReport(
167+
planets,
168+
title='Pandas Profiling Report',
169+
explorative=True,
170+
plot={
171+
'dpi':200,
172+
'image_format': 'png'
173+
}
174+
)
202175
203176
Customise correlation matrix
204177
-----------------------------
178+
It's possible to directly access the correlation matrix as well. That is done with the ``plot`` argument and then with the `correlation` key. It is possible to customise the palette, one can use the following list used in seaborn or create `their own custom matplotlib palette <https://matplotlib.org/stable/gallery/color/custom_cmap.html>`_. Supported values are
205179
206-
It's possible to directly access the correlation matrix as well. That is done with the ``plot`` argument and then with the `correlation` key. It is possible to customise the palett, one can use the following list used in seaborn or create [their own custom matplotlib palette](https://matplotlib.org/stable/gallery/color/custom_cmap.html). Supported values are
207-
208-
```
209180
'Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', 'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', 'coolwarm_r', 'copper', 'copper_r', 'crest', 'crest_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 'flare', 'flare_r', 'gist_earth', 'gist_earth_r', 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 'hsv_r', 'icefire', 'icefire_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', 'mako', 'mako_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 'rainbow', 'rainbow_r', 'rocket', 'rocket_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'turbo', 'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', 'twilight_shifted_r', 'viridis', 'viridis_r', 'vlag', 'vlag_r', 'winter', 'winter_r'
210-
```
211181
212-
An example can be:
213182
183+
An example can be:
214184
.. code-block:: python
215-
216185
from pandas_profiling import ProfileReport
217-
218-
profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True,
219-
plot={
220-
'correlation':{
221-
'cmap': 'RdBu_r',
222-
'bad': '#000000'}}
223-
)
224-
186+
profile = ProfileReport(
187+
df,
188+
title='Pandas Profiling Report',
189+
explorative=True,
190+
plot={
191+
'correlation':{
192+
'cmap': 'RdBu_r',
193+
'bad': '#000000'
194+
}
195+
}
196+
)
225197
226198
Similarly, one can change the palette for *Missing values* using the ``missing`` argument, eg:
227-
228199
.. code-block:: python
229-
230200
from pandas_profiling import ProfileReport
231201
232-
profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True,
233-
plot={
234-
'missing':{
235-
'cmap': 'RdBu_r'}}
236-
)
237-
238-
239-
202+
profile = ProfileReport(
203+
df,
204+
title='Pandas Profiling Report',
205+
explorative=True,
206+
plot={
207+
'missing':{
208+
'cmap': 'RdBu_r'
209+
}
210+
}
211+
)

docsrc/source/pages/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ API
55
.. toctree::
66

77
api/profile_report
8+
api/settings
89
api/controller
910
api/model
1011
api/report

docsrc/source/pages/api/settings.rst

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
========
2+
Settings
3+
========
4+
5+
.. currentmodule:: pandas_profiling.cfg
6+
.. toctree::
7+
8+
.. autosummary::
9+
:toctree: _autosummary
10+
11+
settings
12+
13+
14+
.. pydantic:: Settings

docsrc/source/pages/great_expectations_integration.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Great Expectations then uses this statement to validate whether the column ``pas
1616

1717
Great Expectations renders Expectations to clean, human-readable documentation called *Data Docs*. These HTML docs contain both your Expectation Suites as well as your data validation results each time validation is run – think of it as a continuously updated data quality report.
1818

19-
For more information about Great Expectations, check out the `Great Expectations documentation <https://docs.greatexpectations.io/en/latest/>`_ and join the `Great Expectations Slack channel <https://www.greatexpectations.io/slack>` for help.
19+
For more information about Great Expectations, check out the `Great Expectations documentation <https://docs.greatexpectations.io/en/latest/>`_ and join the `Great Expectations Slack channel <https://www.greatexpectations.io/slack>`_ for help.
2020

2121

2222
Creating Expectation Suites with Pandas Profiling

docsrc/source/pages/metadata.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ By default, the descriptions are presented in the overview tab and next to each
7272
7373
# We can disable showing the descriptions next to each variable
7474
report = df.profile_report(
75-
variable=dict(descriptions=definitions),
76-
show_variable_description=False
75+
variable=dict(descriptions=definitions),
76+
show_variable_description=False
7777
)
7878
7979
report.to_file('report.html')

examples/census/census.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pandas as pd
66

77
from pandas_profiling import ProfileReport
8+
from pandas_profiling.config import Dataset
89
from pandas_profiling.utils.cache import cache_file
910

1011
if __name__ == "__main__":
@@ -46,19 +47,16 @@
4647
with open("census_column_definition.json") as f:
4748
definitions = json.load(f)
4849

49-
profile.set_variable(
50-
"dataset",
51-
{
52-
"description": 'Predict whether income exceeds $50K/yr based on census data. Also known as "Census Income" dataset. Extraction was done by Barry Becker from the 1994 Census database. A set of reasonably clean records was extracted using the following conditions: ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0)). Prediction task is to determine whether a person makes over 50K a year.',
53-
"copyright_year": "1996",
54-
"author": "Ronny Kohavi and Barry Becker",
55-
"creator": "Barry Becker",
56-
"url": "https://archive.ics.uci.edu/ml/datasets/adult",
57-
},
50+
profile.config.dataset = Dataset(
51+
description='Predict whether income exceeds $50K/yr based on census data. Also known as "Census Income" dataset. Extraction was done by Barry Becker from the 1994 Census database. A set of reasonably clean records was extracted using the following conditions: ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0)). Prediction task is to determine whether a person makes over 50K a year.',
52+
copyright_year="1996",
53+
author="Ronny Kohavi and Barry Becker",
54+
creator="Barry Becker",
55+
url="https://archive.ics.uci.edu/ml/datasets/adult",
5856
)
59-
profile.set_variable("variables.descriptions", definitions)
57+
profile.config.variables.descriptions = definitions
6058

6159
# Only show the descriptions in the overview
62-
profile.set_variable("show_variable_description", False)
60+
profile.config.show_variable_description = False
6361

6462
profile.to_file(Path("./census_report.html"))

examples/features/images_cats_and_dogs.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,21 @@
4040
)
4141

4242
# Give our variable a description
43-
profile.set_variable(
44-
"variables.descriptions",
45-
{
46-
"files": "Paths linking to the cats and dogs found https://www.kaggle.com/tongpython/cat-and-dog."
47-
},
48-
)
43+
profile.config.variables.descriptions = {
44+
"files": "Paths linking to the cats and dogs found https://www.kaggle.com/tongpython/cat-and-dog."
45+
}
46+
4947
# If the number of samples is above this threshold, the scatter plots are replaced with hexbin plots
5048
# We are just over the threshold of 10.000 samples, so let's increase the limit.
51-
profile.set_variable("plot.scatter_threshold", 25000)
49+
profile.config.plot.scatter_threshold = 25000
5250

5351
# Enable files and images (off by default, as it uses relatively expensive computations when not interested)
54-
profile.set_variable("vars.path.active", True)
55-
profile.set_variable("vars.file.active", True)
56-
profile.set_variable("vars.image.active", True)
52+
profile.config.vars.path.active = True
53+
profile.config.vars.file.active = True
54+
profile.config.vars.image.active = True
5755

5856
# No exif found, so turn off expensive computation
59-
profile.set_variable("vars.image.exif", False)
57+
profile.config.vars.image.exif = False
6058

6159
# Save the report to a file
6260
profile.to_file("cats-and-dogs.html")

examples/features/images_exif.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,9 @@
4343
explorative=True,
4444
)
4545
# We can also configure the report like this
46-
profile.set_variable(
47-
"variables.descriptions",
48-
{
49-
"files": "The 5 Celebrity Faces Dataset found on Kaggle (dansbecker/5-celebrity-faces-dataset)."
50-
},
51-
)
46+
profile.config.variables.descriptions = {
47+
"files": "The 5 Celebrity Faces Dataset found on Kaggle (dansbecker/5-celebrity-faces-dataset)."
48+
}
5249

5350
# Save the report
5451
profile.to_file("celebrity-faces.html")

requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@ joblib
22
scipy>=1.4.1
33
pandas>=0.25.3,!=1.0.1,!=1.0.0,!=1.0.2,!=1.1.0
44
matplotlib>=3.2.0
5-
confuse>=1.0.0
5+
pydantic>=1.8.1
6+
PyYAML>=5.0.0
67
jinja2>=2.11.1
78
visions[type_image_path]==0.7.1
89
numpy>=1.16.0
9-
attrs>=19.3.0
1010
# Could be optional
1111
# Related to HTML report
1212
htmlmin>=0.1.12

src/pandas_profiling/__init__.py

-6
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,12 @@
33
.. include:: ../../README.md
44
"""
55

6-
from pandas_profiling.config import Config, config
76
from pandas_profiling.controller import pandas_decorator
87
from pandas_profiling.profile_report import ProfileReport
98
from pandas_profiling.version import __version__
109

11-
clear_config = ProfileReport.clear_config
12-
1310
__all__ = [
14-
"Config",
15-
"config",
1611
"pandas_decorator",
1712
"ProfileReport",
1813
"__version__",
19-
"clear_config",
2014
]

0 commit comments

Comments
 (0)