January 4, 2023(September 12, 2023)
Polars
#
Read
#
import polars as pl
# Eager Evaluaiton
data_df = pl.read_ndjson("file.jsonl")
print(data_df.describe())
# Lazy Evaluaiton
data_df = pl.scan_ndjson("file.jsonl")
## Need to evaluation before describe() when lazy evaluation
data_df = data_df.fetch()
print(data_df.describe())
Write
#
import polars as pl
# sample data list[dict]
data_list = [{"name": "alice", "age": "18"},
{"name": "bob", "age": "17"}]
data_df = pl.DataFrame(data_list)
data_df.write_ndjson("file.jsonl")
Pandas
#
Read
#
import pandas as pd
data_df = pd.read_json("file.jsonl", orient="records", lines=True)
print(data_df.info())
Write
#
import pandas as pd
# sample data list[dict]
data_list = [{"name": "alice", "age": "18"},
{"name": "bob", "age": "17"}]
data_df = pd.DataFrame(data_list)
data_df.to_json("file.jsonl", orient="records", lines=True, force_ascii=False)
No library
#
Read
#
import json
data_list = []
with open("file.jsonl", "r") as f:
while True:
line = f.readline()
if not line:
break
data_list.append(json.loads(line))
print(data_list)
Write
#
import json
# sample data list[dict]
data_list = [{"name": "alice", "age": "18"},
{"name": "bob", "age": "17"}]
with open("file.jsonl", mode="w") as f:
for line in data_list:
json.dump(line, f, ensure_ascii=False)
f.write('\n')