https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
https://pandas.pydata.org/docs/reference/api/pandas.read_json.html
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_json.html
https://pandas.pydata.org/docs/reference/api/pandas.Series.to_json.html
https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_excel.html
Pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,...). Importing data from each of these data sources is provided by function with the prefix 'read_*'. Similarly, the 'to_*' methods are used to store data.
# pandas.read_csv(filepath_or_buffer, sep=',', delimiter=None, header='infer', # names=None, index_col=None, usecols=None, squeeze=False, prefix=None, # mangle_dupe_cols=True, dtype=None, engine=None, converters=None, # true_values=None, false_values=None, skipinitialspace=False, skiprows=None, # nrows=None, na_values=None, keep_default_na=True, na_filter=True, # verbose=False, skip_blank_lines=True, parse_dates=False, # infer_datetime_format=False, keep_date_col=False, date_parser=None, # dayfirst=False, iterator=False, chunksize=None, compression='infer', # thousands=None, decimal=b'.', lineterminator=None, quotechar='"', # quoting=0, escapechar=None, comment=None, encoding=None, dialect=None, # tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, skipfooter=0, # doublequote=True, delim_whitespace=False, low_memory=True, memory_map=False, # float_precision=None) # # Read CSV and make DataFrame. # File 'data1.csv' # 11,a,2011 # 12,b,2012 # 13,c,2013 df = pd.read_csv('data1.csv', sep=",", encoding="utf-8", header=None) # 0 1 2 # 0 11 a 2011 # 1 12 b 2012 # 2 13 c 2013 df.describe() df.head() # display the first few records
# pandas.DataFrame.to_csv(self, path_or_buf=None, sep=',', na_rep='', # float_format=None, columns=None, header=True, index=True, index_label=None, # mode='w', encoding=None, compression=None, quoting=None, quotechar='"', # line_terminator='\n', chunksize=None, tupleize_cols=None, date_format=None, # doublequote=True, escapechar=None, decimal='.') # # Write DataFrame to CSV. # encoding : "utf-8", "latin1", "iso-8859-1", "cp1252" # index : write row names (bool) df = pd.DataFrame({'pos':[11,12,13], 'char':['a','b','c'], 'year':[2011,2012,2013]}) # pos char year # 0 11 a 2011 # 1 12 b 2012 # 2 13 c 2013 df.to_csv('data2.csv', encoding="utf-8") # ,pos,char,year # 0,11,a,2011 # 1,12,b,2012 # 2,13,c,2013 df.to_csv("data2.csv", encoding="utf-8", header=False, index=False) # 11,a,2011 # 12,b,2012 # 13,c,2013
https://www.geeksforgeeks.org/pandas-parsing-json-dataset/
Pandas | Parsing JSON Dataset
# Convert the object to a JSON string. # # Note NaN's and None will be converted to null and datetime objects # will be converted to UNIX timestamps. # # to_json(path_or_buf=None, orient=None, date_format=None, double_precision=10, # force_ascii=True, date_unit='ms', default_handler=None, lines=False, # compression=None, index=True) # # path_or_buf : default is a string, may be a file path # orient : for Series {'split','records','index'}, default is 'index' # orient : for DataFeame {'split','records','index','columns','values'}, # default is 'columns' # date_format : {None, 'epoch', 'iso'} # date_unit : string, default 'ms' (milliseconds) series1 = pd.Series( {'a':10, 'b':20, 'c':30} ) # series1.to_json(file_name) # json_string = series1.to_json() # no file_name print(series1.to_json(orient='index')) # default, dict like # {"a":10,"b":20,"c":30} print(series1.to_json(orient='split')) # dict like # {"name":null,"index":["a","b","c"],"data":[10,20,30]} print(series1.to_json(orient='records')) # list like # [10,20,30]
df = pd.DataFrame([['a', 'b'], ['c', 'd']], index=['row1', 'row2'], columns=['col1', 'col2']) # df.to_json(file_name) # json_string = df.to_json() # no file_name df.to_json(orient='columns') # default # {"col1":{"row1":"a","row2":"c"},"col2":{"row1":"b","row2":"d"}} df.to_json(orient='split') # dict like # {"columns":["col1","col2"],"index":["row1","row2"],"data":[["a","b"],["c","d"]]} df.to_json(orient='index') # {"row1":{"col1":"a","col2":"b"},"row2":{"col1":"c","col2":"d"}} df.to_json(orient='values') # [["a","b"],["c","d"]]
# Read JSON and make DataFrame. # # pandas.read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, # convert_axes=True, convert_dates=True, keep_default_dates=True, # numpy=False, precise_float=False, date_unit=None, encoding=None, # lines=False, chunksize=None, compression='infer') # # path_or_buf : a valid JSON string or file-like, a URL # orient : string # type : 'frame' or 'series' # numpy : boolean, direct decoding to numpy arrays # encoding : str, default is 'utf-8', the encoding to use to decode Py3 bytes # # df = pd.read_json(file_name) # df = pd.read_json(json_string) df1 = pd.read_json('[10,20,30]') # default type='frame' # 0 # 0 10 # 1 20 # 2 30 series1 = pd.read_json('[10,20,30]', type='series') # 0 10 # 1 20 # 2 30 # dtype: int64 df2 = pd.read_json('[[11,12],[21,22]]') # 0 1 # 0 11 12 # 1 21 22
# pandas.read_excel(io, sheet_name=0, *, header=0, names=None, index_col=None, # usecols=None, dtype=None, engine=None, converters=None, true_values=None, # false_values=None, skiprows=None, nrows=None, na_values=None, # keep_default_na=True, na_filter=True, verbose=False, parse_dates=False, # date_parser=_NoDefault.no_default, date_format=None, thousands=None, # decimal='.', comment=None, skipfooter=0, storage_options=None, # dtype_backend=_NoDefault.no_default, engine_kwargs=None) # # Read an Excel file into a pandas DataFrame. df = pd.read_excel('data1.xlsx', sheet_name='Sheet1')
# DataFrame.to_excel(excel_writer, sheet_name='Sheet1', na_rep='', # float_format=None, columns=None, header=True, index=True, index_label=None, # startrow=0, startcol=0, engine=None, merge_cells=True, inf_rep='inf', # freeze_panes=None, storage_options=None, engine_kwargs=None) # # Write object to an Excel sheet. df = pd.DataFrame([['a', 'b'], ['c', 'd']], index=['row1', 'row2'], columns=['col1', 'col2']) #df.to_excel("output.xlsx") df.to_excel("output2.xlsx", sheet_name='Sheet_name_1')