Pandas - Series, creation

https://pandas.pydata.org/docs/user_guide/dsintro.html

INTRODUCTION

'Series' is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the 'index'.

CREATION

# pd.Series(data=None, index=None, dtype=None, name=None, copy=False)
# data : array-like, dict, or scalar value
# index : values must be hashable and have the same length as 'data'
# dtype : numpy.dtype or None
# name : string, optional
# copy : boolean, default False; copy input data

s1 = pd.Series(24)   # scalar value
# 0    24
# dtype: int64

s2 = pd.Series(range(0,7,2), name='Numbers')   # default integer index
# 0    0
# 1    2
# 2    4
# 3    6
# Name: Numbers, dtype: int64

s2.index   # RangeIndex(start=0, stop=4, step=1)
# s2.index = list("abcd")
# Changing index to Index(['a', 'b', 'c', 'd'], dtype='object')

s2.values   # array([0, 2, 4, 6])
s2.dtype   # dtype('int64')
s2.name   # 'Numbers'
# s2.name = 'Integers'   # changing 'name'

list(s2)   # [0, 2, 4, 6] from data, not from index

s3 = pd.Series(np.arange(0, 1.1, 0.2))   # from a numpy array
# 0    0.0
# 1    0.2
# 2    0.4
# 3    0.6
# 4    0.8
# 5    1.0
# dtype: float64

s3.index   # RangeIndex(start=0, stop=6, step=1)

del s3[3]   # removing a single item from the series (slow?)
# 0    0.0
# 1    0.2
# 2    0.4
# 4    0.8   # unchanged index
# 5    1.0   # unchanged index
# dtype: float64

s3.index   # Int64Index([0, 1, 2, 4, 5], dtype='int64')

# Indexing
# s3[3]   # KeyError, 3 is interpreted as a label
# s3.loc[3]   # KeyError, 3 is interpreted as a label
# s3.iloc[3]   # 0.8, using a position along the index

s4 = pd.Series("word")   # a string is not interpreted as a sequence
# 0    word
# dtype: object

s5 = pd.Series(list("word"))   # from a list of strings
# 0    w
# 1    o
# 2    r
# 3    d
# dtype: object

# Missing values.
s6 = pd.Series([1, np.nan, 5], dtype=complex)   # using dtype and np.nan (or None)
# 0    (1+0j)
# 1       NaN
# 2    (5+0j)
# dtype: complex128

s1 = pd.Series( {'a':10, 'b':20, 'c':30} )   # from dict, dtype="int64"

s1.index   # Index(['a', 'b', 'c'], dtype='object')
s1.values   # array([10, 20, 30])

s2 = pd.Series(data=[10, 20, 30], index=list("abc")) # dtype="int64"
# s1 and s2 are the same
assert s1.equals(s2)
# Note that s1 == s2 gives a new bool series (elementwise comparizon)
# a    True
# b    True
# c    True
# dtype: bool

print(s1['a'])   # 10
print(s1.a)   # 10, only if labels are proper Python identifiers
print(s1 + s2)   # creating new series (elementwise addition)
# a    20
# b    40
# c    60
# dtype: int64

s3 = pd.Series(data=[1, 2, 4], index=list("abd"))

print(s2 + s3)   # "c" and "d" do not match, s2.add(s3)
# a    11.0
# b    22.0
# c     NaN
# d     NaN
# dtype: float64   # not int64!

print(s2.add(s3, fill_value=0))
# a    11.0
# b    22.0
# c    30.0
# d     4.0
# dtype: float64   # not int64!

s4 = pd.Series(5.0, index=list("abcd"))   # from scalar value
# The value will be repeated to match the length of index.
# a    5.0
# b    5.0
# c    5.0
# d    5.0
# dtype: float64

FUNCTIONS

s1 = pd.Series(np.random.randn(20))

s1.describe()
# count    20.000000
# mean      0.163855
# std       0.908906
# min      -1.635898
# 25%      -0.219260
# 50%       0.201758
# 75%       0.741498
# max       2.139399
# dtype: float64

s1.head()   # s1.head(n=5), return the first n rows
# 0    2.139399
# 1    0.731533
# 2    0.501570
# 3    0.094640
# 4    0.771390
# dtype: float64

s1.tail()   # s1.tail(n=5), return the last n rows.
# 15    0.215668
# 16   -1.451227
# 17    0.207832
# 18   -0.000653
# 19   -0.124284
# dtype: float64

# Statistics
s1.median(), s1.mean(), s1.std()
# (0.20175811256676268, 0.16385549628466706, 0.9089058621043952)

s1.min(), s1.max()
# (-1.6358983302701848, 2.1393994684147994)

s1.quantile(q=0.25), s1.quantile(q=0.5), s1.quantile(q=0.75)
# (-0.21926045832145363, 0.20175811256676268, 0.7414975321407481)

s1_copy = s1.copy()

s1_vc = s1.value_counts()   # new Series