https://pandas.pydata.org/docs/user_guide/timeseries.html
https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html
https://pandas.pydata.org/docs/reference/api/pandas.date_range.html
https://pandas.pydata.org/docs/user_guide/timedeltas.html
https://pandas.pydata.org/docs/reference/api/pandas.to_timedelta.html
'pandas' contains extensive capabilities and features for working with time series data for all domains.
# Different objects for manipulating dates and times in Python. datetime.date() datetime.time() datetime.datetime() datetime.timedelta() numpy.datetime64() numpy.timedelta64() pandas.Timestamp() # numpy.datetime64 inside pandas.Timedelta() pandas.DatetimeIndex() # array of numpy.datetime64 inside pandas.TimedeltaIndex()
pd.Timestamp('2017-01-01T12') # a datetime-like string # Timestamp('2017-01-01 12:00:00') pd.Timestamp(1513393355.5, unit='s') # a float representing a Unix epoch in units of seconds # Timestamp('2017-12-16 03:02:35.500000') pd.Timestamp(1513393355.5) # nanoseconds Timestamp('1970-01-01 00:00:01.513393355') pd.Timestamp(2017, 1, 15, 12) # the API for datetime.datetime # Timestamp('2017-01-15 12:00:00') pd.Timestamp(year=2017, month=1, day=15, hour=12) # the same # Timestamp('2017-01-15 12:00:00')
t = pd.Timestamp.now() # Timestamp('2023-05-27 11:34:55.950145') t.isoformat() # '2023-05-27T11:34:55.950145' t.isoformat(sep=' ') # '2023-05-27 11:34:55.950145' t.floor('D') # Timestamp('2023-05-27 00:00:00') t.ceil('D') # Timestamp('2023-05-28 00:00:00') t.days_in_month # 31 t.day_name() # 'Saturday' t.month_name() # 'May' t.date() # datetime.date(2023, 5, 27)
# pandas.to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, # utc=None, format=None, exact=True, unit=None, infer_datetime_format=False, # origin='unix', cache=True) # Convert argument to datetime. # arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like # errors : {‘ignore’, ‘raise’, ‘coerce’}, default ‘raise’ # Return type depends on input: # list-like: DatetimeIndex # Series: Series of datetime64 dtype # scalar: Timestamp
# Parsing time series information from various sources and formats. import datetime pd.to_datetime(['20200102', np.datetime64('2020-01-03'), datetime.datetime(2020, 1, 4)]) # list-like input # DatetimeIndex(['2020-01-02', '2020-01-03', '2020-01-04'], # dtype='datetime64[ns]', freq=None)
a = np.arange('2020-01-01', '2020-01-08', dtype='datetime64[D]') # ISO dates # array(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04', # '2020-01-05', '2020-01-06', '2020-01-07'], dtype='datetime64[D]') pd.to_datetime(a) # DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04', # '2020-01-05', '2020-01-06', '2020-01-07'], # dtype='datetime64[ns]', freq=None)
s = pd.Series(['20200501','20200502','20200503','20200504']) # strings (not ISO dates) # 0 20200501 # 1 20200502 # 2 20200503 # 3 20200504 # dtype: object pd.to_datetime(s) # return pd.Series # 0 2020-05-01 # 1 2020-05-02 # 2 2020-05-03 # 3 2020-05-04 # dtype: datetime64[ns]
df = pd.DataFrame({'year': [2015, 2016],'month': [2, 3],'day': [4, 5]}) # year month day # 0 2015 2 4 # 1 2016 3 5 pd.to_datetime(df) # return pd.Series # 0 2015-02-04 # 1 2016-03-05 # dtype: datetime64[ns]
pd.to_datetime(1234567890) # interpreted as nanoseconds # Timestamp('1970-01-01 00:00:01.234567890') pd.to_datetime('2021-02-15') # Timestamp('2021-02-15 00:00:00') pd.to_datetime('20210215') # Timestamp('2021-02-15 00:00:00') pd.to_datetime('today') # Timestamp('2024-05-18 12:08:56.026342')
# pandas.date_range(start=None, end=None, periods=None, freq=None, # tz=None, normalize=False, name=None, closed=None, **kwargs) # Return a fixed frequency DatetimeIndex. # start, end : str or datetime-like, optional # periods : int, optional # freq : str or DateOffset, default ‘D’ # ['S' second, 'H' hour, 'D' day, 'W' week, 'M' month, 'Y' year] # Multiplies are allowed: '6H', '3M'. pd.date_range(start='20210105', end='20210110') # closed interval # DatetimeIndex(['2021-01-05', '2021-01-06', '2021-01-07', # '2021-01-08', '2021-01-09', '2021-01-10'], # dtype='datetime64[ns]', freq='D') pd.date_range(start='2020-01-31', periods=5, freq='M') # DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', # '2020-04-30', '2020-05-31'], # dtype='datetime64[ns]', freq='M') pd.date_range("2018-01-01", periods=4, freq="H") # DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00', # '2018-01-01 02:00:00', '2018-01-01 03:00:00'], # dtype='datetime64[ns]', freq='H')
dates = pd.date_range("20210501", periods=6) # 6 days # DatetimeIndex(['2021-05-01', '2021-05-02', '2021-05-03', # '2021-05-04', '2021-05-05', '2021-05-06'], # dtype='datetime64[ns]', freq='D') s4 = pd.Series(dates) # 0 2021-05-01 # 1 2021-05-02 # 2 2021-05-03 # 3 2021-05-04 # 4 2021-05-05 # 5 2021-05-06 # dtype: datetime64[ns] s4[2] # Timestamp('2021-05-03 00:00:00') s4.values # array(['2021-05-01T00:00:00.000000000', '2021-05-02T00:00:00.000000000', # '2021-05-03T00:00:00.000000000', '2021-05-04T00:00:00.000000000', # '2021-05-05T00:00:00.000000000', '2021-05-06T00:00:00.000000000'], # dtype='datetime64[ns]')
s5 = pd.Series(data=range(10, 70, 10), index=dates) # 2021-05-01 10 # 2021-05-02 20 # 2021-05-03 30 # 2021-05-04 40 # 2021-05-05 50 # 2021-05-06 60 # Freq: D, dtype: int64 s4.equals(s5) # False, test whether two objects contain the same elements s5.plot() # xtics are days # plt.plot(s5.index, s5.values, 'ks') # different result plt.show()
t1 = pd.to_datetime('20210504') # Timestamp('2021-05-04 00:00:00') del s5[t1] s5.index # DatetimeIndex(['2021-05-01', '2021-05-02', '2021-05-03', '2021-05-05', # '2021-05-06'], dtype='datetime64[ns]', freq=None)
Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, seconds. They can be both positive and negative. Timedelta is a subclass of datetime.timedelta.
# from strings pd.Timedelta('5 days') # Timedelta('5 days 00:00:00') pd.Timedelta("1 days 2 hours") # Timedelta('1 days 02:00:00') # like datetime.timedelta pd.Timedelta(days=3, seconds=15) # Timedelta('3 days 00:00:15') # from a datetime.timedelta/np.timedelta64 pd.Timedelta(datetime.timedelta(days=1, seconds=1)) # Timedelta('1 days 00:00:01') pd.Timedelta(np.timedelta64(131, 's')) # Timedelta('0 days 00:02:11')
# pandas.to_timedelta(arg, unit=None, errors='raise') # Convert argument to timedelta. # arg : str, timedelta, list-like or Series # unit : str, optional, defaults to 'ns' ['W', 'D', 'h', 'S'] # errors : {‘ignore’, ‘raise’, ‘coerce’}, default ‘raise’ pd.to_timedelta('2 days 3 h 4 min 5 sec') # from string # Timedelta('2 days 03:04:05') pd.to_timedelta(['1 days','3 h','4 min','5 sec'] ) # from list # TimedeltaIndex(['1 days 00:00:00', '0 days 03:00:00', # '0 days 00:04:00', '0 days 00:00:05'], # dtype='timedelta64[ns]', freq=None)
s = pd.Series(pd.date_range("2012-01-01", periods=4, freq="D")) # 0 2012-01-01 # 1 2012-01-02 # 2 2012-01-03 # 3 2012-01-04 # dtype: datetime64[ns] s.max() # Timestamp('2012-01-04 00:00:00') s.min() # Timestamp('2012-01-01 00:00:00')
td = pd.Series([pd.Timedelta(days=i) for i in range(4)]) # 0 0 days # 1 1 days # 2 2 days # 3 3 days # dtype: timedelta64[ns] td.sum() # Timedelta('6 days 00:00:00') td.max() # Timedelta('3 days 00:00:00') td.min() # Timedelta('0 days 00:00:00') td.mean() # Timedelta('1 days 12:00:00')
df = pd.DataFrame({"A": s, "B": td}) # A B # 0 2012-01-01 0 days # 1 2012-01-02 1 days # 2 2012-01-03 2 days # 3 2012-01-04 3 days df["C"] = df["A"] + df["B"] print(df) # A B C # 0 2012-01-01 0 days 2012-01-01 # 1 2012-01-02 1 days 2012-01-03 # 2 2012-01-03 2 days 2012-01-05 # 3 2012-01-04 3 days 2012-01-07
# Using missing values. s[1] = np.nan # df is not changed! print(s) # 0 2012-01-01 # 1 NaT # 2 2012-01-03 # 3 2012-01-04 # dtype: datetime64[ns] td[2] = np.nan # df is not changed! print(td) # 0 0 days # 1 1 days # 2 NaT # 3 3 days # dtype: timedelta64[ns]