Series-level Exploration Examples#

Simple examples of series exploration methods.

Used Libraries#

The following methods selectively use these libraries:

import plotly.io as pio
pio.renderers.default = "notebook"

Basic Info Method#

info() - Shows summary series statistics

For numeric column.

from frameon import load_dataset, FrameOn as fo

titanic = fo(load_dataset('titanic'))
titanic['age'].explore.info(hist_mode='dual_hist_qq')
Summary Statistics for "age" (Type: Float)
Summary Percentiles Detailed Stats Value Counts
Total 714 (80%) Max 80 Mean 29.70 24 30 (3%)
Missing 177 (20%) 99% 65.87 Trimmed Mean (10%) 29.27 22 27 (3%)
Distinct 88 (10%) 95% 56 Mode 24 18 26 (3%)
Non-Duplicate 16 (2%) 75% 38 Range 79.58 28 25 (3%)
Duplicates 802 (90%) 50% 28 IQR 17.88 30 25 (3%)
Dup. Values 72 (8%) 25% 20.12 Std 14.53 19 25 (3%)
Zeros --- 5% 4 MAD 13.34 21 24 (3%)
Negative --- 1% 1 Kurt 0.18 25 23 (3%)
Memory Usage <1 Mb Min 0.42 Skew 0.39 36 22 (2%)
---------------------------------------------------------------------------
ChromeNotFoundError                       Traceback (most recent call last)
ChromeNotFoundError: 

The above exception was the direct cause of the following exception:

ChromeNotFoundError                       Traceback (most recent call last)
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_kaleido.py:380, in to_image(fig, format, width, height, scale, validate, engine)
    379     # TODO: Refactor to make it possible to use a shared Kaleido instance here
--> 380     img_bytes = kaleido.calc_fig_sync(
    381         fig_dict,
    382         opts=dict(
    383             format=format or defaults.default_format,
    384             width=width or defaults.default_width,
    385             height=height or defaults.default_height,
    386             scale=scale or defaults.default_scale,
    387         ),
    388         topojson=defaults.topojson,
    389         kopts=kopts,
    390     )
    391 except ChromeNotFoundError:

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/__init__.py:145, in calc_fig_sync(*args, **kwargs)
    144 """Call `calc_fig` but blocking."""
--> 145 return _async_thread_run(calc_fig, args=args, kwargs=kwargs)

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/__init__.py:138, in _async_thread_run(func, args, kwargs)
    137 if isinstance(res, BaseException):
--> 138     raise res
    139 else:

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/__init__.py:129, in _async_thread_run.<locals>.run(*args, **kwargs)
    128 try:
--> 129     q.put(asyncio.run(func(*args, **kwargs)))
    130 except BaseException as e:  # noqa: BLE001

File ~/.asdf/installs/python/3.13.3/lib/python3.13/asyncio/runners.py:195, in run(main, debug, loop_factory)
    194 with Runner(debug=debug, loop_factory=loop_factory) as runner:
--> 195     return runner.run(main)

File ~/.asdf/installs/python/3.13.3/lib/python3.13/asyncio/runners.py:118, in Runner.run(self, coro, context)
    117 try:
--> 118     return self._loop.run_until_complete(task)
    119 except exceptions.CancelledError:

File ~/.asdf/installs/python/3.13.3/lib/python3.13/asyncio/base_events.py:719, in BaseEventLoop.run_until_complete(self, future)
    717     raise RuntimeError('Event loop stopped before Future completed.')
--> 719 return future.result()

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/__init__.py:54, in calc_fig(fig, path, opts, topojson, kopts)
     53 kopts["n"] = 1
---> 54 async with Kaleido(**kopts) as k:
     55     return await k.calc_fig(
     56         fig,
     57         path=path,
     58         opts=opts,
     59         topojson=topojson,
     60     )

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/kaleido.py:128, in Kaleido.__init__(self, *args, **kwargs)
    127 except ChromeNotFoundError:
--> 128     raise ChromeNotFoundError(
    129         "Kaleido v1 and later requires Chrome to be installed. "
    130         "To install Chrome, use the CLI command `kaleido_get_chrome`, "
    131         "or from Python, use either `kaleido.get_chrome()` "
    132         "or `kaleido.get_chrome_sync()`.",
    133     ) from ChromeNotFoundError
    135 if page and isinstance(page, str) and Path(page).is_file():

ChromeNotFoundError: Kaleido v1 and later requires Chrome to be installed. To install Chrome, use the CLI command `kaleido_get_chrome`, or from Python, use either `kaleido.get_chrome()` or `kaleido.get_chrome_sync()`.

During handling of the above exception, another exception occurred:

RuntimeError                              Traceback (most recent call last)
Cell In[2], line 4
      1 from frameon import load_dataset, FrameOn as fo
      3 titanic = fo(load_dataset('titanic'))
----> 4 titanic['age'].explore.info(hist_mode='dual_hist_qq')

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/frameon/series/explore/info.py:136, in SeriesOnInfo.info(self, plot, column_type, hist_mode, lower_quantile, upper_quantile, top_n, max_words, height, width, labels, title, show_text, xaxis_type, yaxis_type, renderer, **plotly_kwargs)
    134         if hist_mode not in ['base', 'dual_hist_trim', 'dual_hist_qq']:
    135             raise ValueError("hist_mode must be on of 'base', dual_hist_trim' or 'dual_hist_qq'")
--> 136         return self._generate_histogram(hist_mode, lower_quantile, upper_quantile, title, height, width, labels, xaxis_type, yaxis_type, plotly_kwargs)
    138 if column_type != 'datetime' and (column_type in ['text', 'categorical'] or pd.api.types.is_string_dtype(self._series.dropna())):
    139     if pd.api.types.is_datetime64_any_dtype(self._series):

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/frameon/series/explore/info.py:553, in SeriesOnInfo._generate_histogram(self, hist_mode, lower_quantile, upper_quantile, title, height, width, labels, xaxis_type, yaxis_type, plotly_kwargs)
    536 labels.setdefault(self._series.name, 'Value')
    537 params = dict(
    538     x=self._series,
    539     mode=hist_mode,
   (...)    551     # height=config['sizes']['height'],        
    552 )
--> 553 fig = builder.build(**params)
    554 if annotations and hist_mode == 'dual_hist_trim':
    555     fig.update_layout(annotations=annotations)

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/frameon/utils/plotting/base_histogram.py:92, in HistogramBuilder.build(self, **kwargs)
     89 self._apply_final_styling()
     91 if self.config.renderer is not None:
---> 92     self.figure.show(config=dict(dpi=200), renderer=self.config.renderer, height=self.figure_height, width=self.figure_width)
     93 else:
     94     return self.figure

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/basedatatypes.py:3420, in BaseFigure.show(self, *args, **kwargs)
   3387 """
   3388 Show a figure using either the default renderer(s) or the renderer(s)
   3389 specified by the renderer argument
   (...)   3416 None
   3417 """
   3418 import plotly.io as pio
-> 3420 return pio.show(self, *args, **kwargs)

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_renderers.py:407, in show(fig, renderer, validate, **kwargs)
    404 fig_dict = validate_coerce_fig_to_dict(fig, validate)
    406 # Mimetype renderers
--> 407 bundle = renderers._build_mime_bundle(fig_dict, renderers_string=renderer, **kwargs)
    408 if bundle:
    409     if not ipython_display:

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_renderers.py:315, in RenderersConfig._build_mime_bundle(self, fig_dict, renderers_string, **kwargs)
    312             if hasattr(renderer, k):
    313                 setattr(renderer, k, v)
--> 315         bundle.update(renderer.to_mimebundle(fig_dict))
    317 return bundle

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_base_renderers.py:123, in ImageRenderer.to_mimebundle(self, fig_dict)
    122 def to_mimebundle(self, fig_dict):
--> 123     image_bytes = to_image(
    124         fig_dict,
    125         format=self.format,
    126         width=self.width,
    127         height=self.height,
    128         scale=self.scale,
    129         validate=False,
    130         engine=self.engine,
    131     )
    133     if self.b64_encode:
    134         image_str = base64.b64encode(image_bytes).decode("utf8")

File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_kaleido.py:392, in to_image(fig, format, width, height, scale, validate, engine)
    380         img_bytes = kaleido.calc_fig_sync(
    381             fig_dict,
    382             opts=dict(
   (...)    389             kopts=kopts,
    390         )
    391     except ChromeNotFoundError:
--> 392         raise RuntimeError(PLOTLY_GET_CHROME_ERROR_MSG)
    394 else:
    395     # Kaleido v0
    396     if ENABLE_KALEIDO_V0_DEPRECATION_WARNINGS:

RuntimeError: 

Kaleido requires Google Chrome to be installed.

Either download and install Chrome yourself following Google's instructions for your operating system,
or install it from your terminal by running:

    $ plotly_get_chrome

For categorical column.

from frameon import load_dataset, FrameOn as fo

titanic = fo(load_dataset('titanic'))
titanic['class'].explore.info()

For datetime column.

from frameon import load_dataset, FrameOn as fo

superstore = fo(load_dataset('superstore'))
superstore['Order Date'].explore.info()

For text column.

from frameon import load_dataset, FrameOn as fo

reviews = fo(load_dataset('reviews'))
reviews['Text'].explore.info()

Detect Anomalies#

detect_anomalies() - Detects anomalies in the series using the specified method.

Return boolean mask where True indicates anomalies in the series.

from frameon import load_dataset, FrameOn as fo

titanic = fo(load_dataset('titanic'))
mask = titanic['age'].explore.detect_anomalies(
    anomaly_type='missing'
)
mask.head()

Detect Outliers#

detect_outliers() - Detect outliers in series using statistical and machine learning methods.

from frameon import load_dataset, FrameOn as fo

tips = fo(load_dataset('tips'))
tips['total_bill'].explore.detect_outliers(
    method='quantile',
    threshold=0.05
)

Anomalies by Categories#

anomalies_by_categories() - Analyze anomaly distribution across all categorical columns in parent DataFrame.

from frameon import load_dataset, FrameOn as fo

tips = fo(load_dataset('tips'))
tips['total_bill'].explore.anomalies_by_categories(
    anomaly_type='outlier',
    method='quantile',
    threshold=0.05
)

Anomalies Over Time#

anomalies_over_time() - Plot anomalies over time using resampling.

from frameon import load_dataset, FrameOn as fo

taxis = fo(load_dataset('taxis'))
fig = taxis['payment'].explore.anomalies_over_time(
    anomaly_type='missing',
    time_column='pickup',
    freq='1D'
)
fig.show()

Detect Window Outliers#

detect_window_outliers() - Detect and analyze outliers in rolling windows of time series data.

from frameon import load_dataset, FrameOn as fo

superstore = fo(load_dataset('superstore'))
superstore['Sales'].explore.detect_window_outliers(
    time_column='Order Date'
    , window=10
    , resample_freq='W'
    , agg_func='mean'
    , method='confidence'
    , threshold=0.05
)

Plot Rolling Anomaly Rate#

plot_rolling_anomaly_rate() - Calculate and visualize the rolling rate of specified anomalies in a time series.

from frameon import load_dataset, FrameOn as fo

taxis = fo(load_dataset('taxis'))
taxis['payment'].explore.plot_rolling_anomaly_rate(
    anomaly_type='missing',
    time_column='pickup',
    window=30, 
)