https://pandas.pydata.org/docs/user_guide/dsintro.html
s1 = pd.Series(np.random.randn(4), index=list("abcd")) # a -0.588964 # standard normal distribution # b 0.474307 # c -1.149516 # d 1.096136 # dtype: float64 # Indexing # s1.loc[] is primarily label based. # s1.iloc[] is primarily integer position based. # s1[] combines both styles (possible problems when index is int). s1.loc['a':'b'] # using labels, both ends included s1.iloc[0:2] # numpy style s1[0:2] # slicing will also slice the index, numpy style s1['a':'b'] # using labels, both ends included #s1['a':'b'] = 1.0 # update is possible # a -0.588964 # b 0.474307 # dtype: float64 s1['a'] # -0.5889642925100458, Series is dict-like #s1['a'] = 0.0 # update is possible assert s1.a == s1['a'] 'c' in s1 # True s1['k'] # KeyError: 'k' s1 > 0 # boolean series, numpy style #s1.apply(lambda item: item > 0) # a False # b True # c False # d True # dtype: bool s1[s1 > 0] # filtering using boolean series #s1.loc[s1 > 0] # b 0.474307 # d 1.096136 # dtype: float64 s1[[3,1,0]] # using index array, numpy style s1.iloc[[3,1,0]] # numpy style s1.loc[['d','b','a']] # using labels # d 1.096136 # b 0.474307 # a -0.588964 # dtype: float64 s1 * 2 # vectorized operations, numpy style #s1.apply(lambda item: item * 2) # a -1.177929 # b 0.948614 # c -2.299031 # d 2.192273 # dtype: float64 np.exp(s1) # numpy functions # a 0.554902 # b 1.606901 # c 0.316790 # d 2.992581 # dtype: float64
s2 = pd.Series(range(0,9,2), dtype=float, index=list("abcde")) # a 0.0 # b 2.0 # c 4.0 # d 6.0 # e 8.0 # dtype: float64 s3 = s2['b':'c'] # slicing, new Series is created # b 2.0 # c 4.0 # dtype: float64 s3 = s2.loc['b':'c'] # the same s3 = s2.iloc[1:3] # the same s3 = s2.drop(labels=['b','d']) # new Series is created # a 0.0 # c 4.0 # e 8.0 # dtype: float64 s3 = s2.drop(s2.index[3]) # remove 'd' row s3 = s2.drop(s2.index[[1,3]]) # remove 'b', 'd' rows s3 = s2.drop(s2.index[1:4]) # remove 'b', 'c', 'd' rows
# https://pandas.pydata.org/docs/reference/api/pandas.Series.append.html # pd.Series.append(to_append, ignore_index=False, verify_integrity=False) # Concatenate two or more Series. # to_append : Series or list/tuple of Series # ignore_index : bool, default False # If True, the resulting axis will be labeled 0, 1, ... # verify_integrity : bool, default False # If True, raise Exception on creating index with duplicates. # # Note: do not append items to a series one by one, # better extend with an ordered collection. # Accumulate data in an external list or a dictionary, # make a second series, then append to the first series. s1 = pd.Series([1, 2, 3]) s2 = pd.Series([4, 5, 6]) s3 = s1.append(s2) # 0 1 index with duplicates # 1 2 # 2 3 # 0 4 # 1 5 # 2 6 # dtype: int64 s3.index # Int64Index([0, 1, 2, 0, 1, 2], dtype='int64') # Note that s3[0] gives a new series with duplicate labels (problems!) # 0 1 # 0 4 # dtype: int64 # https://pandas.pydata.org/docs/user_guide/duplicates.html
s4 = s1.append(s2, ignore_index=True) # 0 1 # 1 2 # 2 3 # 3 4 # 4 5 # 5 6 # dtype: int64 s4.index # RangeIndex(start=0, stop=6, step=1)
s1.append(s2, verify_integrity=True) # Traceback (most recent call last): # ... # ValueError: Indexes have overlapping values: Int64Index([0, 1, 2], dtype='int64')
s1 = pd.Series(np.random.randn(10)) # s1.index vs s1.values #s1 = pd.Series(np.random.randn(10), index=list("qwertyuiop")) # no xtics s1.plot() # return Axes #plt.scatter(s1.index, s1.values) # simple approach #plt.plot(s1.index, s1.values) # simple approach # Selected options # kind : 'line', 'bar', 'barh', 'hist', 'box', 'area', 'pie' # ax : matplotlib axes object; if not passed, uses gca() # figsize : a tuple (width, height) in inches # use_index : boolean, default True; use index as ticks for x axis # title : string, title to use for the plot # grid : boolean, default None # legend : False/True/'reverse' # xlim : 2-tuple/list # ylim : 2-tuple/list # rot : int, default None; rotation for ticks # fontsize : int, default None plt.show()