Pandas - files

https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html

https://pandas.pydata.org/docs/reference/api/pandas.read_json.html

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_json.html

https://pandas.pydata.org/docs/reference/api/pandas.Series.to_json.html

https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_excel.html

INTRODUCTION

Pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,...). Importing data from each of these data sources is provided by function with the prefix 'read_*'. Similarly, the 'to_*' methods are used to store data.

USING CSV FILES


# pandas.read_csv(filepath_or_buffer, sep=',', delimiter=None, header='infer',
#     names=None, index_col=None, usecols=None, squeeze=False, prefix=None,
#     mangle_dupe_cols=True, dtype=None, engine=None, converters=None,
#     true_values=None, false_values=None, skipinitialspace=False, skiprows=None,
#     nrows=None, na_values=None, keep_default_na=True, na_filter=True,
#     verbose=False, skip_blank_lines=True, parse_dates=False,
#     infer_datetime_format=False, keep_date_col=False, date_parser=None,
#     dayfirst=False, iterator=False, chunksize=None, compression='infer',
#     thousands=None, decimal=b'.', lineterminator=None, quotechar='"',
#     quoting=0, escapechar=None, comment=None, encoding=None, dialect=None,
#     tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, skipfooter=0,
#     doublequote=True, delim_whitespace=False, low_memory=True, memory_map=False,
#     float_precision=None)
#
# Read CSV and make DataFrame.
# File 'data1.csv'
# 11,a,2011
# 12,b,2012
# 13,c,2013

df = pd.read_csv('data1.csv', sep=",", encoding="utf-8", header=None)
#     0  1     2
# 0  11  a  2011
# 1  12  b  2012
# 2  13  c  2013
df.describe()
df.head()   # display the first few records

# pandas.DataFrame.to_csv(self, path_or_buf=None, sep=',', na_rep='',
#     float_format=None, columns=None, header=True, index=True, index_label=None,
#     mode='w', encoding=None, compression=None, quoting=None, quotechar='"',
#     line_terminator='\n', chunksize=None, tupleize_cols=None, date_format=None,
#     doublequote=True, escapechar=None, decimal='.')
#
# Write DataFrame to CSV.
# encoding : "utf-8", "latin1", "iso-8859-1", "cp1252"
# index : write row names (bool)

df = pd.DataFrame({'pos':[11,12,13], 'char':['a','b','c'], 'year':[2011,2012,2013]})
#    pos char  year
# 0   11    a  2011
# 1   12    b  2012
# 2   13    c  2013

df.to_csv('data2.csv', encoding="utf-8")
# ,pos,char,year
# 0,11,a,2011
# 1,12,b,2012
# 2,13,c,2013

df.to_csv("data2.csv", encoding="utf-8", header=False, index=False)
# 11,a,2011
# 12,b,2012
# 13,c,2013

USING JSON FILES

https://www.geeksforgeeks.org/pandas-parsing-json-dataset/
Pandas | Parsing JSON Dataset


# Convert the object to a JSON string.
#
# Note NaN's and None will be converted to null and datetime objects
# will be converted to UNIX timestamps.
#
# to_json(path_or_buf=None, orient=None, date_format=None, double_precision=10,
#     force_ascii=True, date_unit='ms', default_handler=None, lines=False,
#     compression=None, index=True)
#
# path_or_buf : default is a string, may be a file path
# orient : for Series {'split','records','index'}, default is 'index'
# orient : for DataFeame {'split','records','index','columns','values'},
#     default is 'columns'
# date_format : {None, 'epoch', 'iso'}
# date_unit : string, default 'ms' (milliseconds)

series1 = pd.Series( {'a':10, 'b':20, 'c':30} )

# series1.to_json(file_name)
# json_string = series1.to_json()   # no file_name

print(series1.to_json(orient='index'))   # default, dict like
# {"a":10,"b":20,"c":30}

print(series1.to_json(orient='split'))   # dict like
# {"name":null,"index":["a","b","c"],"data":[10,20,30]}

print(series1.to_json(orient='records'))   # list like
# [10,20,30]

df = pd.DataFrame([['a', 'b'], ['c', 'd']], 
    index=['row1', 'row2'], columns=['col1', 'col2']) 

# df.to_json(file_name)
# json_string = df.to_json()   # no file_name

df.to_json(orient='columns')   # default
# {"col1":{"row1":"a","row2":"c"},"col2":{"row1":"b","row2":"d"}}

df.to_json(orient='split')   # dict like
# {"columns":["col1","col2"],"index":["row1","row2"],"data":[["a","b"],["c","d"]]}

df.to_json(orient='index')
# {"row1":{"col1":"a","col2":"b"},"row2":{"col1":"c","col2":"d"}}

df.to_json(orient='values')
# [["a","b"],["c","d"]]

# Read JSON and make DataFrame.
#
# pandas.read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
#     convert_axes=True, convert_dates=True, keep_default_dates=True,
#     numpy=False, precise_float=False, date_unit=None, encoding=None,
#     lines=False, chunksize=None, compression='infer')
#
# path_or_buf : a valid JSON string or file-like, a URL
# orient : string
# type : 'frame' or 'series'
# numpy : boolean, direct decoding to numpy arrays
# encoding : str, default is 'utf-8', the encoding to use to decode Py3 bytes
#
# df = pd.read_json(file_name)
# df = pd.read_json(json_string)

df1 = pd.read_json('[10,20,30]')   # default type='frame'
#     0
# 0  10
# 1  20
# 2  30

series1 = pd.read_json('[10,20,30]', type='series')
# 0    10
# 1    20
# 2    30
# dtype: int64

df2 = pd.read_json('[[11,12],[21,22]]')
#     0   1
# 0  11  12
# 1  21  22

USING EXCEL FILES


# pandas.read_excel(io, sheet_name=0, *, header=0, names=None, index_col=None,
#     usecols=None, dtype=None, engine=None, converters=None, true_values=None,
#     false_values=None, skiprows=None, nrows=None, na_values=None,
#     keep_default_na=True, na_filter=True, verbose=False, parse_dates=False,
#     date_parser=_NoDefault.no_default, date_format=None, thousands=None,
#     decimal='.', comment=None, skipfooter=0, storage_options=None,
#     dtype_backend=_NoDefault.no_default, engine_kwargs=None)
#
# Read an Excel file into a pandas DataFrame.

df = pd.read_excel('data1.xlsx', sheet_name='Sheet1')

# DataFrame.to_excel(excel_writer, sheet_name='Sheet1', na_rep='',
#     float_format=None, columns=None, header=True, index=True, index_label=None,
#     startrow=0, startcol=0, engine=None, merge_cells=True, inf_rep='inf',
#     freeze_panes=None, storage_options=None, engine_kwargs=None)
#
# Write object to an Excel sheet.

df = pd.DataFrame([['a', 'b'], ['c', 'd']],
    index=['row1', 'row2'], columns=['col1', 'col2'])
#df.to_excel("output.xlsx")
df.to_excel("output2.xlsx", sheet_name='Sheet_name_1')