Commit cc58e1cf authored by julian.gethmann's avatar julian.gethmann

Add option `save_local` to :func:`pvs2pd`

* `save_local` saves the JSON files to the current directory, so one
has got an backup of the JSON entry one is using in the further
processing. This is intended for reproducibility, backup and offline
working purposes.
parent 7a00bd4d
......@@ -3,6 +3,10 @@ Changelog
=========
* [TODO] Add the raw PV name option to the command line interface
0.5.7 (2017-09-27)
------------------
* Add option `save_local` to :func:`pvs2pd`
0.5.6 (2017-09-21)
------------------
* Add option `upsample` to :func:`pvs2pd`
......
......@@ -11,9 +11,9 @@ from .cassandra import Cassandra
from .cassandra import Pvs
def pvs2pd(start, end, pv_names, count=None, upsample=None):
def pvs2pd(start, end, pv_names, count=None, upsample=None, save_local=False):
# type: (datetime.datetime, datetime.datetime, Iterable[str],
# Optional[int], Optional[str]) -> pd.DataFrame
# Optional[int], Optional[str], Optional[bool]) -> pd.DataFrame
"""Return a `pd.DataFrame` with data for all `pv_names` and `time` as index
Missing data points are filled with the last value (like in CSS).
......@@ -38,6 +38,8 @@ def pvs2pd(start, end, pv_names, count=None, upsample=None):
`offset-alias <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`_
then it is upsampled with this frequency. e. g. `upsample="ms"`
returns a DataFrame with data of equal distance of 1 milli-second.
save_local (bool): defaults to False, if True the cassandra data sets
are saved locally in the same directory.
Returns:
pd.DataFrame: "time" as index and `pv_names` as columns.
......@@ -52,14 +54,23 @@ def pvs2pd(start, end, pv_names, count=None, upsample=None):
.. versionchanged:: 0.5.6
Added argument `upsample`
.. versionchanged:: 0.5.7
Added argument `save_local`
"""
collected_data = pd.DataFrame({"time": [start, end]}).set_index("time")
for pv_name in pv_names:
pv = Pvs.pv[pv_name] if ":" not in pv_name else pv_name
with Cassandra(start, end, pv=pv) as cas:
if save_local:
cas = Cassandra(start, end, pv=pv, count=None, directory=".")
cas.get_json_local()
collected_data = collected_data.join(pd.DataFrame(
{"time": cas[0], pv_name: cas[1]}
).set_index("time"), how="outer")
else:
with Cassandra(start, end, pv=pv) as cas:
collected_data = collected_data.join(pd.DataFrame(
{"time": cas[0], pv_name: cas[1]}
).set_index("time"), how="outer")
collected_data = collected_data.ffill()
collected_data = collected_data[collected_data.first_valid_index():]
# if start in collected_data.index:
......
......@@ -3,7 +3,7 @@
from setuptools import find_packages
from setuptools import setup
__version__ = '0.5.6'
__version__ = '0.5.7'
def readme():
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# :title: Unittests for calc_phasespaces.py
# :lastupdate: 2016-09-15
import datetime
import doctest
import unittest
import cassandra
from cassandra.pandas import pvs2pd
class CassandraTestCase(unittest.TestCase):
def setUp(self):
self.pv_list = ["nu_x", "nu_y"]
self.start = datetime.datetime(2016, 8, 5, 18, 28, 36)
self.end = datetime.datetime(
2016, 8, 5, 18, 28, 36) + datetime.timedelta(seconds=15)
def tearDown(self):
pass
def test_pvs2pd(self):
ret = pvs2pd(self.start, self.end, self.pv_list)
self.assertListEqual(list(ret.mean()),
[0.81171229137420042, 0.72489568710029628])
ret = pvs2pd(self.start, self.end, self.pv_list, upsample="ms")
self.assertListEqual(list(ret.mean()),
[0.81168935758343974, 0.72495481466421241])
with self.assertRaises(ValueError):
pvs2pd(self.start, self.end, self.pv_list, upsample=True)
if __name__ == "__main__":
# unittest.main()
testSuite = unittest.TestSuite()
testSuite.addTest(unittest.makeSuite(CassandraTestCase))
testSuite.addTest(doctest.DocTestSuite(cassandra.cassandra))
unittest.TextTestRunner(verbosity=2).run(testSuite)
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment