https://pandas.pydata.org/docs/user_guide/dsintro.html
'DataFrame' is a 2-dimensional labeled data structure with columns of potentially different types (like a spreadsheet). It is generally the most commonly used pandas object.
# pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False) # data : ndarray, iterable, dict, or DataFrame # index : index or array-like # colums : index or array-like # dtype : numpy.dtype or None # copy : boolean, default False; copy input data
# Constructing DataFrame from a dictionary.
# D = {'col1': series1, 'col2': series2}
D = {'col1': [1, 2], 'col2': [3, 4]}
df1 = pd.DataFrame(D) # inferred dtype is int64
#df1 = pd.DataFrame(D, dtype=np.int8) # dtype is set
# col1 col2
# 0 1 3
# 1 2 4
df1.dtypes
# col1 int64
# col2 int64
# dtype: object
df1.index # RangeIndex(start=0, stop=2, step=1)
#df1.index = ['row1', 'row2'] # changing index
df1.index.values # array([0, 1])
df1.columns # Index(['col1', 'col2'], dtype='object')
#df1.columns = ['A', 'B'] # changing columns
df1.columns.values # array(['col1', 'col2'], dtype=object)
df1.values # array([[1, 3], [2, 4]])
df1.shape # (2, 2), (nrows, ncols)
df1.size # 4, nrows * ncols
# Constructing DataFrame from numpy ndarray.
df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
columns=list('ABC'))
# A B C
# 0 1 2 3
# 1 4 5 6
# 2 7 8 9
# Constructing DataFrame from a list of dictionaries.
# Every row is a dictionary with column names as keys.
L = [{'x':10, 'y':1.1}, {'x':20, 'y':2.2}, {'x':30, 'y':3.3}]
df5 = pd.DataFrame(L, columns=['x','y']) # 'columns' for proper column ordering
# x y
# 0 10 1.1
# 1 20 2.2
# 2 30 3.3
df5.dtypes
# x int64
# y float64
# dtype: object
df5.values
# array([[10. , 1.1],
# [20. , 2.2],
# [30. , 3.3]])
print(df.head()) # viewing the first 5 rows print(df.head(n)) # viewing the first n rows print(df.tail()) # viewing the last 5 rows print(df.tail(n)) # viewing the last n rows print(df.info()) # print info about the data df_copy = df.copy() df_index = df.set_index(column_name) # new DataFrame df_sort = df.sort_index() # new DataFrame