https://pandas.pydata.org/docs/user_guide/dsintro.html
'DataFrame' is a 2-dimensional labeled data structure with columns of potentially different types (like a spreadsheet). It is generally the most commonly used pandas object.
# pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False) # data : ndarray, iterable, dict, or DataFrame # index : index or array-like # colums : index or array-like # dtype : numpy.dtype or None # copy : boolean, default False; copy input data
# Constructing DataFrame from a dictionary. # D = {'col1': series1, 'col2': series2} D = {'col1': [1, 2], 'col2': [3, 4]} df1 = pd.DataFrame(D) # inferred dtype is int64 #df1 = pd.DataFrame(D, dtype=np.int8) # dtype is set # col1 col2 # 0 1 3 # 1 2 4 df1.dtypes # col1 int64 # col2 int64 # dtype: object df1.index # RangeIndex(start=0, stop=2, step=1) #df1.index = ['row1', 'row2'] # changing index df1.index.values # array([0, 1]) df1.columns # Index(['col1', 'col2'], dtype='object') #df1.columns = ['A', 'B'] # changing columns df1.columns.values # array(['col1', 'col2'], dtype=object) df1.values # array([[1, 3], [2, 4]]) df1.shape # (2, 2), (nrows, ncols) df1.size # 4, nrows * ncols
# Constructing DataFrame from numpy ndarray. df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=list('ABC')) # A B C # 0 1 2 3 # 1 4 5 6 # 2 7 8 9
# Constructing DataFrame from a list of dictionaries. # Every row is a dictionary with column names as keys. L = [{'x':10, 'y':1.1}, {'x':20, 'y':2.2}, {'x':30, 'y':3.3}] df5 = pd.DataFrame(L, columns=['x','y']) # 'columns' for proper column ordering # x y # 0 10 1.1 # 1 20 2.2 # 2 30 3.3 df5.dtypes # x int64 # y float64 # dtype: object df5.values # array([[10. , 1.1], # [20. , 2.2], # [30. , 3.3]])
print(df.head()) # viewing the first 5 rows print(df.head(n)) # viewing the first n rows print(df.tail()) # viewing the last 5 rows print(df.tail(n)) # viewing the last n rows print(df.info()) # print info about the data df_copy = df.copy() df_index = df.set_index(column_name) # new DataFrame df_sort = df.sort_index() # new DataFrame