How to handle jsonl in Python

How to handle jsonl in Python

January 4, 2023(September 12, 2023)
python, jsonl

Polars #

Read #

import polars as pl

# Eager Evaluaiton
data_df = pl.read_ndjson("file.jsonl")

print(data_df.describe())

# Lazy Evaluaiton
data_df = pl.scan_ndjson("file.jsonl")

## Need to evaluation before describe() when lazy evaluation
data_df = data_df.fetch()
print(data_df.describe())

Write #

import polars as pl

# sample data list[dict]
data_list = [{"name": "alice", "age": "18"},
             {"name": "bob", "age": "17"}]

data_df = pl.DataFrame(data_list)

data_df.write_ndjson("file.jsonl")

Pandas #

Read #

import pandas as pd

data_df = pd.read_json("file.jsonl", orient="records", lines=True)

print(data_df.info())

Write #

import pandas as pd

# sample data list[dict]
data_list = [{"name": "alice", "age": "18"},
             {"name": "bob", "age": "17"}]

data_df = pd.DataFrame(data_list)

data_df.to_json("file.jsonl", orient="records", lines=True, force_ascii=False)

No library #

Read #

import json

data_list = []

with open("file.jsonl", "r") as f:
    while True:
        line = f.readline()
        if not line:
            break
        data_list.append(json.loads(line))

print(data_list)

Write #

import json

# sample data list[dict]
data_list = [{"name": "alice", "age": "18"},
             {"name": "bob", "age": "17"}]

with open("file.jsonl", mode="w") as f:
    for line in data_list:
        json.dump(line, f, ensure_ascii=False)
        f.write('\n')