Series-level Exploration Examples#
Simple examples of series exploration methods.
Used Libraries#
The following methods selectively use these libraries:
import plotly.io as pio
pio.renderers.default = "notebook"
Basic Info Method#
info() - Shows summary series statistics
For numeric column.
from frameon import load_dataset, FrameOn as fo
titanic = fo(load_dataset('titanic'))
titanic['age'].explore.info(hist_mode='dual_hist_qq')
| Summary | Percentiles | Detailed Stats | Value Counts | |||||||
|---|---|---|---|---|---|---|---|---|---|---|
| Total | 714 (80%) | Max | 80 | Mean | 29.70 | 24 | 30 (3%) | |||
| Missing | 177 (20%) | 99% | 65.87 | Trimmed Mean (10%) | 29.27 | 22 | 27 (3%) | |||
| Distinct | 88 (10%) | 95% | 56 | Mode | 24 | 18 | 26 (3%) | |||
| Non-Duplicate | 16 (2%) | 75% | 38 | Range | 79.58 | 28 | 25 (3%) | |||
| Duplicates | 802 (90%) | 50% | 28 | IQR | 17.88 | 30 | 25 (3%) | |||
| Dup. Values | 72 (8%) | 25% | 20.12 | Std | 14.53 | 19 | 25 (3%) | |||
| Zeros | --- | 5% | 4 | MAD | 13.34 | 21 | 24 (3%) | |||
| Negative | --- | 1% | 1 | Kurt | 0.18 | 25 | 23 (3%) | |||
| Memory Usage | <1 Mb | Min | 0.42 | Skew | 0.39 | 36 | 22 (2%) | |||
---------------------------------------------------------------------------
ChromeNotFoundError Traceback (most recent call last)
ChromeNotFoundError:
The above exception was the direct cause of the following exception:
ChromeNotFoundError Traceback (most recent call last)
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_kaleido.py:380, in to_image(fig, format, width, height, scale, validate, engine)
379 # TODO: Refactor to make it possible to use a shared Kaleido instance here
--> 380 img_bytes = kaleido.calc_fig_sync(
381 fig_dict,
382 opts=dict(
383 format=format or defaults.default_format,
384 width=width or defaults.default_width,
385 height=height or defaults.default_height,
386 scale=scale or defaults.default_scale,
387 ),
388 topojson=defaults.topojson,
389 kopts=kopts,
390 )
391 except ChromeNotFoundError:
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/__init__.py:145, in calc_fig_sync(*args, **kwargs)
144 """Call `calc_fig` but blocking."""
--> 145 return _async_thread_run(calc_fig, args=args, kwargs=kwargs)
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/__init__.py:138, in _async_thread_run(func, args, kwargs)
137 if isinstance(res, BaseException):
--> 138 raise res
139 else:
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/__init__.py:129, in _async_thread_run.<locals>.run(*args, **kwargs)
128 try:
--> 129 q.put(asyncio.run(func(*args, **kwargs)))
130 except BaseException as e: # noqa: BLE001
File ~/.asdf/installs/python/3.13.3/lib/python3.13/asyncio/runners.py:195, in run(main, debug, loop_factory)
194 with Runner(debug=debug, loop_factory=loop_factory) as runner:
--> 195 return runner.run(main)
File ~/.asdf/installs/python/3.13.3/lib/python3.13/asyncio/runners.py:118, in Runner.run(self, coro, context)
117 try:
--> 118 return self._loop.run_until_complete(task)
119 except exceptions.CancelledError:
File ~/.asdf/installs/python/3.13.3/lib/python3.13/asyncio/base_events.py:719, in BaseEventLoop.run_until_complete(self, future)
717 raise RuntimeError('Event loop stopped before Future completed.')
--> 719 return future.result()
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/__init__.py:54, in calc_fig(fig, path, opts, topojson, kopts)
53 kopts["n"] = 1
---> 54 async with Kaleido(**kopts) as k:
55 return await k.calc_fig(
56 fig,
57 path=path,
58 opts=opts,
59 topojson=topojson,
60 )
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/kaleido/kaleido.py:128, in Kaleido.__init__(self, *args, **kwargs)
127 except ChromeNotFoundError:
--> 128 raise ChromeNotFoundError(
129 "Kaleido v1 and later requires Chrome to be installed. "
130 "To install Chrome, use the CLI command `kaleido_get_chrome`, "
131 "or from Python, use either `kaleido.get_chrome()` "
132 "or `kaleido.get_chrome_sync()`.",
133 ) from ChromeNotFoundError
135 if page and isinstance(page, str) and Path(page).is_file():
ChromeNotFoundError: Kaleido v1 and later requires Chrome to be installed. To install Chrome, use the CLI command `kaleido_get_chrome`, or from Python, use either `kaleido.get_chrome()` or `kaleido.get_chrome_sync()`.
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
Cell In[2], line 4
1 from frameon import load_dataset, FrameOn as fo
3 titanic = fo(load_dataset('titanic'))
----> 4 titanic['age'].explore.info(hist_mode='dual_hist_qq')
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/frameon/series/explore/info.py:136, in SeriesOnInfo.info(self, plot, column_type, hist_mode, lower_quantile, upper_quantile, top_n, max_words, height, width, labels, title, show_text, xaxis_type, yaxis_type, renderer, **plotly_kwargs)
134 if hist_mode not in ['base', 'dual_hist_trim', 'dual_hist_qq']:
135 raise ValueError("hist_mode must be on of 'base', dual_hist_trim' or 'dual_hist_qq'")
--> 136 return self._generate_histogram(hist_mode, lower_quantile, upper_quantile, title, height, width, labels, xaxis_type, yaxis_type, plotly_kwargs)
138 if column_type != 'datetime' and (column_type in ['text', 'categorical'] or pd.api.types.is_string_dtype(self._series.dropna())):
139 if pd.api.types.is_datetime64_any_dtype(self._series):
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/frameon/series/explore/info.py:553, in SeriesOnInfo._generate_histogram(self, hist_mode, lower_quantile, upper_quantile, title, height, width, labels, xaxis_type, yaxis_type, plotly_kwargs)
536 labels.setdefault(self._series.name, 'Value')
537 params = dict(
538 x=self._series,
539 mode=hist_mode,
(...) 551 # height=config['sizes']['height'],
552 )
--> 553 fig = builder.build(**params)
554 if annotations and hist_mode == 'dual_hist_trim':
555 fig.update_layout(annotations=annotations)
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/frameon/utils/plotting/base_histogram.py:92, in HistogramBuilder.build(self, **kwargs)
89 self._apply_final_styling()
91 if self.config.renderer is not None:
---> 92 self.figure.show(config=dict(dpi=200), renderer=self.config.renderer, height=self.figure_height, width=self.figure_width)
93 else:
94 return self.figure
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/basedatatypes.py:3420, in BaseFigure.show(self, *args, **kwargs)
3387 """
3388 Show a figure using either the default renderer(s) or the renderer(s)
3389 specified by the renderer argument
(...) 3416 None
3417 """
3418 import plotly.io as pio
-> 3420 return pio.show(self, *args, **kwargs)
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_renderers.py:407, in show(fig, renderer, validate, **kwargs)
404 fig_dict = validate_coerce_fig_to_dict(fig, validate)
406 # Mimetype renderers
--> 407 bundle = renderers._build_mime_bundle(fig_dict, renderers_string=renderer, **kwargs)
408 if bundle:
409 if not ipython_display:
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_renderers.py:315, in RenderersConfig._build_mime_bundle(self, fig_dict, renderers_string, **kwargs)
312 if hasattr(renderer, k):
313 setattr(renderer, k, v)
--> 315 bundle.update(renderer.to_mimebundle(fig_dict))
317 return bundle
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_base_renderers.py:123, in ImageRenderer.to_mimebundle(self, fig_dict)
122 def to_mimebundle(self, fig_dict):
--> 123 image_bytes = to_image(
124 fig_dict,
125 format=self.format,
126 width=self.width,
127 height=self.height,
128 scale=self.scale,
129 validate=False,
130 engine=self.engine,
131 )
133 if self.b64_encode:
134 image_str = base64.b64encode(image_bytes).decode("utf8")
File ~/checkouts/readthedocs.org/user_builds/frameon/envs/v0.1.0/lib/python3.13/site-packages/plotly/io/_kaleido.py:392, in to_image(fig, format, width, height, scale, validate, engine)
380 img_bytes = kaleido.calc_fig_sync(
381 fig_dict,
382 opts=dict(
(...) 389 kopts=kopts,
390 )
391 except ChromeNotFoundError:
--> 392 raise RuntimeError(PLOTLY_GET_CHROME_ERROR_MSG)
394 else:
395 # Kaleido v0
396 if ENABLE_KALEIDO_V0_DEPRECATION_WARNINGS:
RuntimeError:
Kaleido requires Google Chrome to be installed.
Either download and install Chrome yourself following Google's instructions for your operating system,
or install it from your terminal by running:
$ plotly_get_chrome
For categorical column.
from frameon import load_dataset, FrameOn as fo
titanic = fo(load_dataset('titanic'))
titanic['class'].explore.info()
For datetime column.
from frameon import load_dataset, FrameOn as fo
superstore = fo(load_dataset('superstore'))
superstore['Order Date'].explore.info()
For text column.
from frameon import load_dataset, FrameOn as fo
reviews = fo(load_dataset('reviews'))
reviews['Text'].explore.info()
Detect Anomalies#
detect_anomalies() - Detects anomalies in the series using the specified method.
Return boolean mask where True indicates anomalies in the series.
from frameon import load_dataset, FrameOn as fo
titanic = fo(load_dataset('titanic'))
mask = titanic['age'].explore.detect_anomalies(
anomaly_type='missing'
)
mask.head()
Detect Outliers#
detect_outliers() - Detect outliers in series using statistical and machine learning methods.
from frameon import load_dataset, FrameOn as fo
tips = fo(load_dataset('tips'))
tips['total_bill'].explore.detect_outliers(
method='quantile',
threshold=0.05
)
Anomalies by Categories#
anomalies_by_categories() - Analyze anomaly distribution across all categorical columns in parent DataFrame.
from frameon import load_dataset, FrameOn as fo
tips = fo(load_dataset('tips'))
tips['total_bill'].explore.anomalies_by_categories(
anomaly_type='outlier',
method='quantile',
threshold=0.05
)
Anomalies Over Time#
anomalies_over_time() - Plot anomalies over time using resampling.
from frameon import load_dataset, FrameOn as fo
taxis = fo(load_dataset('taxis'))
fig = taxis['payment'].explore.anomalies_over_time(
anomaly_type='missing',
time_column='pickup',
freq='1D'
)
fig.show()
Detect Window Outliers#
detect_window_outliers() - Detect and analyze outliers in rolling windows of time series data.
from frameon import load_dataset, FrameOn as fo
superstore = fo(load_dataset('superstore'))
superstore['Sales'].explore.detect_window_outliers(
time_column='Order Date'
, window=10
, resample_freq='W'
, agg_func='mean'
, method='confidence'
, threshold=0.05
)
Plot Rolling Anomaly Rate#
plot_rolling_anomaly_rate() - Calculate and visualize the rolling rate of specified anomalies in a time series.
from frameon import load_dataset, FrameOn as fo
taxis = fo(load_dataset('taxis'))
taxis['payment'].explore.plot_rolling_anomaly_rate(
anomaly_type='missing',
time_column='pickup',
window=30,
)