Pandas - DataFrame, creation

https://pandas.pydata.org/docs/user_guide/dsintro.html

INTRODUCTION

'DataFrame' is a 2-dimensional labeled data structure with columns of potentially different types (like a spreadsheet). It is generally the most commonly used pandas object.


# pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)
# data : ndarray, iterable, dict, or DataFrame
# index : index or array-like
# colums : index or array-like
# dtype : numpy.dtype or None
# copy : boolean, default False; copy input data

# Constructing DataFrame from a dictionary.
# D = {'col1': series1, 'col2': series2}

D = {'col1': [1, 2], 'col2': [3, 4]}

df1 = pd.DataFrame(D)   # inferred dtype is int64
#df1 = pd.DataFrame(D, dtype=np.int8)   # dtype is set
#    col1  col2
# 0     1     3
# 1     2     4

df1.dtypes
# col1    int64
# col2    int64
# dtype: object

df1.index   # RangeIndex(start=0, stop=2, step=1)
#df1.index = ['row1', 'row2']   # changing index
df1.index.values   # array([0, 1])

df1.columns   # Index(['col1', 'col2'], dtype='object')
#df1.columns = ['A', 'B']   # changing columns
df1.columns.values   # array(['col1', 'col2'], dtype=object)

df1.values   # array([[1, 3], [2, 4]])

df1.shape   # (2, 2), (nrows, ncols)

df1.size   # 4, nrows * ncols

# Constructing DataFrame from numpy ndarray.

df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
    columns=list('ABC'))
#    A  B  C
# 0  1  2  3
# 1  4  5  6
# 2  7  8  9

# Constructing DataFrame from a list of dictionaries.
# Every row is a dictionary with column names as keys.

L = [{'x':10, 'y':1.1}, {'x':20, 'y':2.2}, {'x':30, 'y':3.3}]
df5 = pd.DataFrame(L, columns=['x','y'])   # 'columns' for proper column ordering
#     x    y
# 0  10  1.1
# 1  20  2.2
# 2  30  3.3

df5.dtypes
# x      int64
# y    float64
# dtype: object

df5.values
# array([[10. ,  1.1],
#        [20. ,  2.2],
#        [30. ,  3.3]])

print(df.head())   # viewing the first 5 rows
print(df.head(n))   # viewing the first n rows

print(df.tail())   # viewing the last 5 rows
print(df.tail(n))   # viewing the last n rows

print(df.info())   # print info about the data

df_copy = df.copy()

df_index = df.set_index(column_name)   # new DataFrame

df_sort = df.sort_index()   # new DataFrame