python – Splitting one csv into multiple files
python – Splitting one csv into multiple files
In Python
Use readlines()
and writelines()
to do that, here is an example:
>>> csvfile = open(import_1458922827.csv, r).readlines()
>>> filename = 1
>>> for i in range(len(csvfile)):
... if i % 1000 == 0:
... open(str(filename) + .csv, w+).writelines(csvfile[i:i+1000])
... filename += 1
the output file names will be numbered 1.csv
, 2.csv
, … etc.
From terminal
FYI, you can do this from the command line using split
as follows:
$ split -l 1000 import_1458922827.csv
I suggest you not inventing a wheel. There is existing solution. Source here
import os
def split(filehandler, delimiter=,, row_limit=1000,
output_name_template=output_%s.csv, output_path=., keep_headers=True):
import csv
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, w), delimiter=delimiter)
current_limit = row_limit
if keep_headers:
headers = reader.next()
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, w), delimiter=delimiter)
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
Use it like:
split(open(/your/pat/input.csv, r));
python – Splitting one csv into multiple files
A python3-friendly solution:
def split_csv(source_filepath, dest_folder, split_file_prefix,
records_per_file):
Split a source csv into multiple csvs of equal numbers of records,
except the last file.
Includes the initial header row in each split file.
Split files follow a zero-index sequential naming convention like so:
`{split_file_prefix}_0.csv`
if records_per_file <= 0:
raise Exception(records_per_file must be > 0)
with open(source_filepath, r) as source:
reader = csv.reader(source)
headers = next(reader)
file_idx = 0
records_exist = True
while records_exist:
i = 0
target_filename = f{split_file_prefix}_{file_idx}.csv
target_filepath = os.path.join(dest_folder, target_filename)
with open(target_filepath, w) as target:
writer = csv.writer(target)
while i < records_per_file:
if i == 0:
writer.writerow(headers)
try:
writer.writerow(next(reader))
i += 1
except StopIteration:
records_exist = False
break
if i == 0:
# we only wrote the header, so delete that file
os.remove(target_filepath)
file_idx += 1