https://pandas.pydata.org/docs/user_guide/dsintro.html
'Series' is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the 'index'.
# pd.Series(data=None, index=None, dtype=None, name=None, copy=False) # data : array-like, dict, or scalar value # index : values must be hashable and have the same length as 'data' # dtype : numpy.dtype or None # name : string, optional # copy : boolean, default False; copy input data s1 = pd.Series(24) # scalar value # 0 24 # dtype: int64
s2 = pd.Series(range(0,7,2), name='Numbers') # default integer index # 0 0 # 1 2 # 2 4 # 3 6 # Name: Numbers, dtype: int64 s2.index # RangeIndex(start=0, stop=4, step=1) # s2.index = list("abcd") # Changing index to Index(['a', 'b', 'c', 'd'], dtype='object') s2.values # array([0, 2, 4, 6]) s2.dtype # dtype('int64') s2.name # 'Numbers' # s2.name = 'Integers' # changing 'name' list(s2) # [0, 2, 4, 6] from data, not from index
s3 = pd.Series(np.arange(0, 1.1, 0.2)) # from a numpy array # 0 0.0 # 1 0.2 # 2 0.4 # 3 0.6 # 4 0.8 # 5 1.0 # dtype: float64 s3.index # RangeIndex(start=0, stop=6, step=1) del s3[3] # removing a single item from the series (slow?) # 0 0.0 # 1 0.2 # 2 0.4 # 4 0.8 # unchanged index # 5 1.0 # unchanged index # dtype: float64 s3.index # Int64Index([0, 1, 2, 4, 5], dtype='int64') # Indexing # s3[3] # KeyError, 3 is interpreted as a label # s3.loc[3] # KeyError, 3 is interpreted as a label # s3.iloc[3] # 0.8, using a position along the index
s4 = pd.Series("word") # a string is not interpreted as a sequence # 0 word # dtype: object s5 = pd.Series(list("word")) # from a list of strings # 0 w # 1 o # 2 r # 3 d # dtype: object
# Missing values. s6 = pd.Series([1, np.nan, 5], dtype=complex) # using dtype and np.nan (or None) # 0 (1+0j) # 1 NaN # 2 (5+0j) # dtype: complex128
s1 = pd.Series( {'a':10, 'b':20, 'c':30} ) # from dict, dtype="int64" s1.index # Index(['a', 'b', 'c'], dtype='object') s1.values # array([10, 20, 30]) s2 = pd.Series(data=[10, 20, 30], index=list("abc")) # dtype="int64" # s1 and s2 are the same assert s1.equals(s2) # Note that s1 == s2 gives a new bool series (elementwise comparizon) # a True # b True # c True # dtype: bool print(s1['a']) # 10 print(s1.a) # 10, only if labels are proper Python identifiers print(s1 + s2) # creating new series (elementwise addition) # a 20 # b 40 # c 60 # dtype: int64 s3 = pd.Series(data=[1, 2, 4], index=list("abd")) print(s2 + s3) # "c" and "d" do not match, s2.add(s3) # a 11.0 # b 22.0 # c NaN # d NaN # dtype: float64 # not int64! print(s2.add(s3, fill_value=0)) # a 11.0 # b 22.0 # c 30.0 # d 4.0 # dtype: float64 # not int64!
s4 = pd.Series(5.0, index=list("abcd")) # from scalar value # The value will be repeated to match the length of index. # a 5.0 # b 5.0 # c 5.0 # d 5.0 # dtype: float64
s1 = pd.Series(np.random.randn(20)) s1.describe() # count 20.000000 # mean 0.163855 # std 0.908906 # min -1.635898 # 25% -0.219260 # 50% 0.201758 # 75% 0.741498 # max 2.139399 # dtype: float64 s1.head() # s1.head(n=5), return the first n rows # 0 2.139399 # 1 0.731533 # 2 0.501570 # 3 0.094640 # 4 0.771390 # dtype: float64 s1.tail() # s1.tail(n=5), return the last n rows. # 15 0.215668 # 16 -1.451227 # 17 0.207832 # 18 -0.000653 # 19 -0.124284 # dtype: float64 # Statistics s1.median(), s1.mean(), s1.std() # (0.20175811256676268, 0.16385549628466706, 0.9089058621043952) s1.min(), s1.max() # (-1.6358983302701848, 2.1393994684147994) s1.quantile(q=0.25), s1.quantile(q=0.5), s1.quantile(q=0.75) # (-0.21926045832145363, 0.20175811256676268, 0.7414975321407481) s1_copy = s1.copy() s1_vc = s1.value_counts() # new Series