https://pandas.pydata.org/docs/user_guide/dsintro.html
'Series' is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the 'index'.
# pd.Series(data=None, index=None, dtype=None, name=None, copy=False) # data : array-like, dict, or scalar value # index : values must be hashable and have the same length as 'data' # dtype : numpy.dtype or None # name : string, optional # copy : boolean, default False; copy input data s1 = pd.Series(24) # scalar value # 0 24 # dtype: int64
s2 = pd.Series(range(0,7,2), name='Numbers') # default integer index
# 0 0
# 1 2
# 2 4
# 3 6
# Name: Numbers, dtype: int64
s2.index # RangeIndex(start=0, stop=4, step=1)
# s2.index = list("abcd")
# Changing index to Index(['a', 'b', 'c', 'd'], dtype='object')
s2.values # array([0, 2, 4, 6])
s2.dtype # dtype('int64')
s2.name # 'Numbers'
# s2.name = 'Integers' # changing 'name'
list(s2) # [0, 2, 4, 6] from data, not from index
s3 = pd.Series(np.arange(0, 1.1, 0.2)) # from a numpy array # 0 0.0 # 1 0.2 # 2 0.4 # 3 0.6 # 4 0.8 # 5 1.0 # dtype: float64 s3.index # RangeIndex(start=0, stop=6, step=1) del s3[3] # removing a single item from the series (slow?) # 0 0.0 # 1 0.2 # 2 0.4 # 4 0.8 # unchanged index # 5 1.0 # unchanged index # dtype: float64 s3.index # Int64Index([0, 1, 2, 4, 5], dtype='int64') # Indexing # s3[3] # KeyError, 3 is interpreted as a label # s3.loc[3] # KeyError, 3 is interpreted as a label # s3.iloc[3] # 0.8, using a position along the index
s4 = pd.Series("word") # a string is not interpreted as a sequence
# 0 word
# dtype: object
s5 = pd.Series(list("word")) # from a list of strings
# 0 w
# 1 o
# 2 r
# 3 d
# dtype: object
# Missing values. s6 = pd.Series([1, np.nan, 5], dtype=complex) # using dtype and np.nan (or None) # 0 (1+0j) # 1 NaN # 2 (5+0j) # dtype: complex128
s1 = pd.Series( {'a':10, 'b':20, 'c':30} ) # from dict, dtype="int64"
s1.index # Index(['a', 'b', 'c'], dtype='object')
s1.values # array([10, 20, 30])
s2 = pd.Series(data=[10, 20, 30], index=list("abc")) # dtype="int64"
# s1 and s2 are the same
assert s1.equals(s2)
# Note that s1 == s2 gives a new bool series (elementwise comparizon)
# a True
# b True
# c True
# dtype: bool
print(s1['a']) # 10
print(s1.a) # 10, only if labels are proper Python identifiers
print(s1 + s2) # creating new series (elementwise addition)
# a 20
# b 40
# c 60
# dtype: int64
s3 = pd.Series(data=[1, 2, 4], index=list("abd"))
print(s2 + s3) # "c" and "d" do not match, s2.add(s3)
# a 11.0
# b 22.0
# c NaN
# d NaN
# dtype: float64 # not int64!
print(s2.add(s3, fill_value=0))
# a 11.0
# b 22.0
# c 30.0
# d 4.0
# dtype: float64 # not int64!
s4 = pd.Series(5.0, index=list("abcd")) # from scalar value
# The value will be repeated to match the length of index.
# a 5.0
# b 5.0
# c 5.0
# d 5.0
# dtype: float64
s1 = pd.Series(np.random.randn(20)) s1.describe() # count 20.000000 # mean 0.163855 # std 0.908906 # min -1.635898 # 25% -0.219260 # 50% 0.201758 # 75% 0.741498 # max 2.139399 # dtype: float64 s1.head() # s1.head(n=5), return the first n rows # 0 2.139399 # 1 0.731533 # 2 0.501570 # 3 0.094640 # 4 0.771390 # dtype: float64 s1.tail() # s1.tail(n=5), return the last n rows. # 15 0.215668 # 16 -1.451227 # 17 0.207832 # 18 -0.000653 # 19 -0.124284 # dtype: float64 # Statistics s1.median(), s1.mean(), s1.std() # (0.20175811256676268, 0.16385549628466706, 0.9089058621043952) s1.min(), s1.max() # (-1.6358983302701848, 2.1393994684147994) s1.quantile(q=0.25), s1.quantile(q=0.5), s1.quantile(q=0.75) # (-0.21926045832145363, 0.20175811256676268, 0.7414975321407481) s1_copy = s1.copy() s1_vc = s1.value_counts() # new Series