csv splitter
This commit is contained in:
parent
681685c356
commit
8b65a7c03d
167
12_csv_split.py
Normal file
167
12_csv_split.py
Normal file
@ -0,0 +1,167 @@
|
||||
### WIP
|
||||
|
||||
import sys
|
||||
import os
|
||||
import getopt
|
||||
import csv
|
||||
|
||||
"""
|
||||
Splits a CSV file into multiple pieces based on command line arguments.
|
||||
|
||||
Arguments:
|
||||
`-h`: help file of usage of the script
|
||||
`-i`: input file name
|
||||
`-o`: output file, A %s-style template for the numbered output files.
|
||||
`-r`: row limit to split
|
||||
`-c`: A %s-style template for the numbered output files.
|
||||
|
||||
Default settings:
|
||||
`output_path` is the current directory
|
||||
`keep_headers` is on (headers will be kept)
|
||||
`delimeter` is ,
|
||||
|
||||
Example usage:
|
||||
# split by every 10000 rows
|
||||
>> python 12_csv_split.py -i input.csv -o rownumber -r 10000
|
||||
# split by unique items in column 0
|
||||
>> python 12_csv_split.py -i input.csv -o userid -c 0
|
||||
# access help
|
||||
>> python 12_csv_split.py -h for help
|
||||
|
||||
"""
|
||||
|
||||
def main(argv):
|
||||
|
||||
argument_dict = grab_command_line_arguments(argv)
|
||||
parse_file(argument_dict)
|
||||
|
||||
|
||||
def grab_command_line_arguments(argv):
|
||||
|
||||
# global variables
|
||||
inputfile = ''
|
||||
outputfile = ''
|
||||
rowlimit = ''
|
||||
columnindex = ''
|
||||
argument_dict = {}
|
||||
|
||||
# grab arguments
|
||||
opts, args = getopt.getopt(argv,"hi:o:r:c:",["ifile=","ofile=","rowlimit=","columnindex="])
|
||||
|
||||
# end if no arguments provided
|
||||
if not opts:
|
||||
print "No options provided. Try again. Use `-h` for help."
|
||||
sys.exit()
|
||||
|
||||
# grab arguments
|
||||
for opt, arg in opts:
|
||||
if opt == '-h':
|
||||
print 'csvsplit.py -i <inputfile> -r <row limit> -c <column index> -o <outputfile>'
|
||||
sys.exit()
|
||||
elif opt in ("-i", "--ifile"):
|
||||
inputfile = arg
|
||||
elif opt in ("-o", "--ofile"):
|
||||
outputfile = arg
|
||||
elif opt in ("-r", "--rowlimit"):
|
||||
rowlimit = arg
|
||||
elif opt in ("-c", "--columnindex"):
|
||||
columnindex = arg
|
||||
|
||||
# Output arguments
|
||||
print "\nArguments:"
|
||||
if inputfile:
|
||||
argument_dict["input_file"] = inputfile
|
||||
print "Input file is '{}'".format(inputfile)
|
||||
else:
|
||||
"Please enter an input file."
|
||||
if outputfile:
|
||||
argument_dict["output_file"] = outputfile
|
||||
print "Output file is '{}'".format(outputfile)
|
||||
else:
|
||||
print "Please enter an output file."
|
||||
if rowlimit:
|
||||
argument_dict["rowlimit"] = rowlimit
|
||||
print "Rowlimit is '{}'".format(rowlimit)
|
||||
if columnindex:
|
||||
argument_dict["columnindex"] = columnindex
|
||||
print "Columnindex is '{}'".format(columnindex)
|
||||
if rowlimit and columnindex:
|
||||
print "Please use either a rowlimit or columnlimit, not both."
|
||||
sys.exit()
|
||||
if not rowlimit or columnindex:
|
||||
print "Please enter either a rowlimit or columnlimit."
|
||||
sys.exit()
|
||||
|
||||
# to do - check to make sure file, rowlimit, and columnlimit exist
|
||||
print argument_dict
|
||||
return argument_dict
|
||||
|
||||
|
||||
def parse_file(argument_dict):
|
||||
|
||||
#split csv file by certain rownumber
|
||||
if argument_dict["rowlimit"]:
|
||||
rowlimit = int(argument_dict["rowlimit"])
|
||||
output_name_file = "{}.csv".format(argument_dict["output_file"])
|
||||
output_path='.'
|
||||
keep_headers=True
|
||||
delimiter=','
|
||||
filehandler = open(argument_dict["input_file"],'r')
|
||||
reader = csv.reader(filehandler, delimiter=delimiter)
|
||||
current_piece = 1
|
||||
current_out_path = os.path.join(
|
||||
output_path,
|
||||
output_name_file
|
||||
)
|
||||
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
|
||||
current_limit = rowlimit
|
||||
if keep_headers:
|
||||
headers = reader.next()
|
||||
current_out_writer.writerow(headers)
|
||||
for i, row in enumerate(reader):
|
||||
if i + 1 > current_limit:
|
||||
current_piece += 1
|
||||
current_limit = rowlimit * current_piece
|
||||
current_out_path = os.path.join(
|
||||
output_path,
|
||||
output_name_file
|
||||
)
|
||||
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
|
||||
|
||||
# elif columnindex: #split csv file accrording to unique values of certain column,it's like filter only certain item in excel
|
||||
# itemlist = []
|
||||
# columnindex = int(columnindex)
|
||||
# output_name_template= outputfile+'_%s.csv'
|
||||
# output_path='.'
|
||||
# keep_headers=True
|
||||
# delimiter=','
|
||||
# filehandler = open(inputfile,'r')
|
||||
# reader = csv.reader(filehandler, delimiter=delimiter)
|
||||
# if keep_headers:
|
||||
# headers = reader.next()
|
||||
|
||||
# for i, row in enumerate(reader):
|
||||
|
||||
# current_out_path = os.path.join(
|
||||
# output_path,
|
||||
# output_name_template % row[columnindex] )
|
||||
# if row[columnindex] not in itemlist:
|
||||
# try:
|
||||
# current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
|
||||
# except IOError:
|
||||
# continue
|
||||
# else:
|
||||
# itemlist.append(row[columnindex])
|
||||
# if keep_headers:
|
||||
# current_out_writer.writerow(headers)
|
||||
# current_out_writer.writerow(row)
|
||||
# else:
|
||||
# current_out_writer = csv.writer(open(current_out_path, 'a'), delimiter=delimiter)
|
||||
# current_out_writer.writerow(row)
|
||||
# print 'totally %i unique items in column %i \n' % (len(itemlist),columnindex)
|
||||
# else:
|
||||
# print "oops, please check instruction of script by >>./csvsplit.py -h"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
101
12_sample_csv.csv
Normal file
101
12_sample_csv.csv
Normal file
@ -0,0 +1,101 @@
|
||||
First Name,Last Name,Email Address,Phone Number,Company,Date Hired
|
||||
Abigail,Branch,volutpat.ornare.facilisis@Phasellusvitaemauris.co.uk,(412) 540-6276,Sem Eget PC,07/02/2013
|
||||
Roanna,Lambert,tristique.pharetra@arcuvelquam.ca,(747) 536-6748,Eget Laoreet Foundation,11/23/2013
|
||||
Amanda,England,semper.rutrum@blandit.com,(669) 164-6411,Magna Nec Quam Limited,08/11/2012
|
||||
Hilel,Chapman,ultrices@tempor.ca,(683) 531-0279,Sed Molestie PC,06/25/2012
|
||||
Basia,Bowers,Quisque.ornare@tinciduntnibh.com,(135) 986-6437,Tincidunt Nunc Ac Associates,05/11/2013
|
||||
Dylan,Dunlap,est.Mauris@etnetuset.org,(877) 604-4603,Eu Ultrices Institute,07/02/2012
|
||||
Regan,Cardenas,vitae.semper@ultriciesornareelit.org,(693) 378-7235,Neque Morbi Corporation,10/30/2012
|
||||
Sade,Green,tortor@sagittis.co.uk,(816) 255-5508,Eleifend Ltd,09/03/2012
|
||||
Marshall,Richardson,sed.facilisis@eu.com,(460) 132-4621,Purus Maecenas Libero LLC,12/21/2012
|
||||
Regina,Brown,semper.auctor@sem.co.uk,(185) 963-9365,Vulputate Consulting,06/16/2013
|
||||
Irma,Rivers,vitae@luctusvulputate.net,(701) 393-3679,Nec Leo Morbi Incorporated,05/07/2013
|
||||
Rudyard,Cline,fringilla@risusatfringilla.org,(971) 228-3147,Risus Quis Consulting,04/25/2013
|
||||
Justina,Richmond,sapien.Nunc.pulvinar@vitaeerat.co.uk,(755) 103-3125,Ullamcorper Associates,02/12/2013
|
||||
Reece,Blackburn,felis@Aliquamauctor.com,(239) 528-2742,Suspendisse Associates,04/03/2014
|
||||
Lillith,Holden,ut.dolor.dapibus@porttitor.net,(305) 797-1579,Dapibus Id Blandit LLP,09/11/2013
|
||||
Taylor,Vinson,ac@vellectusCum.net,(355) 993-1099,Egestas Institute,05/16/2012
|
||||
Colton,Barker,volutpat@necluctus.ca,(705) 978-5992,Ornare Consulting,04/24/2013
|
||||
Vladimir,Walls,mollis.lectus@imperdietullamcorperDuis.edu,(311) 406-4856,Faucibus Ut Nulla LLP,08/12/2012
|
||||
Freya,Rowland,sagittis@elementumduiquis.co.uk,(284) 850-7506,Turpis PC,05/31/2013
|
||||
Cullen,Phelps,Nam.ligula@orciluctus.ca,(425) 280-1763,Rhoncus Id Mollis Consulting,09/10/2013
|
||||
Boris,Lopez,posuere@adipiscingligula.edu,(769) 701-0055,Nunc Sed Orci Industries,07/26/2013
|
||||
Alvin,Meyer,Etiam@felis.ca,(783) 312-0821,Dignissim Pharetra Ltd,03/02/2013
|
||||
Nicole,Boyle,tortor.Integer@imperdiet.edu,(675) 678-1160,Dictum Eleifend Nunc LLC,05/05/2012
|
||||
Flynn,Petersen,dui@lectusrutrum.com,(787) 543-7411,Penatibus Et Associates,03/11/2013
|
||||
Troy,Herman,a.felis.ullamcorper@sem.ca,(932) 900-7922,Dolor Donec Associates,11/16/2012
|
||||
Constance,Shields,nec.leo.Morbi@eunulla.com,(221) 761-2368,Vel Quam Company,02/14/2014
|
||||
Ocean,Green,vulputate.dui@bibendumDonecfelis.net,(481) 832-0298,Nunc Associates,03/03/2013
|
||||
Steven,Lopez,Suspendisse.ac@sedpedeCum.net,(294) 415-0435,Ipsum Company,07/25/2013
|
||||
Adara,Lee,magna.Duis@erat.org,(760) 291-7826,Eu Ultrices PC,10/05/2013
|
||||
Noble,Hancock,Donec.tincidunt.Donec@dictumcursusNunc.edu,(333) 272-8234,Vitae Risus Duis LLC,09/13/2012
|
||||
Kendall,Wilcox,quis.pede@Pellentesqueut.ca,(173) 982-4381,Ultrices Industries,01/26/2013
|
||||
Sebastian,Barton,orci.Ut@ametfaucibus.ca,(951) 817-9217,In Mi Pede Corporation,05/11/2014
|
||||
Gavin,Clark,metus.facilisis.lorem@Sedetlibero.ca,(671) 714-8378,Vestibulum Neque Limited,06/06/2012
|
||||
Charles,Woods,Maecenas.mi.felis@lacusvarius.org,(559) 935-9739,Amet Ante Company,09/02/2013
|
||||
Elvis,Roberts,tempor.diam@risus.co.uk,(184) 182-5324,Facilisis Vitae Inc.,01/07/2014
|
||||
Caldwell,Carey,Suspendisse@Proin.edu,(125) 243-9354,Egestas Lacinia Sed Inc.,10/24/2012
|
||||
Jesse,Leblanc,sit@tellussemmollis.com,(726) 216-8000,Lectus Ltd,11/22/2013
|
||||
Hu,Adkins,purus.in.molestie@acmattisvelit.co.uk,(370) 317-7556,Aliquam Vulputate Company,10/19/2013
|
||||
Hamilton,Tyler,taciti.sociosqu.ad@Sedmalesuadaaugue.com,(234) 744-3868,Nunc Sed LLC,10/19/2012
|
||||
Cade,Osborn,at.iaculis.quis@doloregestas.org,(501) 753-9793,Consectetuer Industries,08/14/2013
|
||||
Ashely,Kent,Cum.sociis.natoque@odioPhasellusat.edu,(789) 869-6558,Imperdiet Ornare Corporation,02/04/2013
|
||||
Veda,Cameron,tristique.pharetra@necenimNunc.co.uk,(522) 127-0654,Egestas Incorporated,12/29/2012
|
||||
Burke,Ferrell,orci.sem@semPellentesque.co.uk,(975) 891-3694,Purus Accumsan Institute,07/26/2013
|
||||
Fuller,Lamb,orci.Donec@vulputatedui.edu,(523) 614-5785,Pede Cum Sociis Limited,12/02/2013
|
||||
Natalie,Taylor,In@lorem.ca,(117) 594-2685,A Facilisis Non LLP,12/06/2013
|
||||
Astra,Morton,nec@scelerisquenequeNullam.com,(390) 867-2558,Non Ante Bibendum Foundation,05/07/2012
|
||||
David,Espinoza,gravida@a.co.uk,(287) 945-5239,Lobortis Nisi Nibh Industries,05/11/2014
|
||||
Sybil,Todd,risus@sitametrisus.edu,(611) 848-4765,Massa Mauris Vestibulum Incorporated,01/19/2013
|
||||
Lee,Barron,cursus.non@Praesentinterdumligula.ca,(765) 654-9167,In Ornare Inc.,01/01/2013
|
||||
Zachery,Reed,nulla.Integer.urna@amet.edu,(667) 465-1222,Ac Corp.,10/07/2012
|
||||
Marshall,Brady,lobortis.nisi.nibh@molestiearcu.edu,(391) 336-5310,Ac Sem Ut Incorporated,07/12/2012
|
||||
Selma,Floyd,eros.turpis.non@lectusconvallis.net,(398) 920-1076,Non Foundation,07/21/2012
|
||||
Ivy,Garrison,posuere@euodio.net,(428) 321-5542,Semper Erat Foundation,12/19/2013
|
||||
Wyatt,Gibbs,Sed@nequeNullamut.ca,(973) 141-9840,Pellentesque Corp.,11/21/2013
|
||||
Vaughan,Moss,adipiscing@Phasellusfermentum.net,(597) 730-0228,Tempor Institute,10/27/2013
|
||||
Elijah,Mcgowan,Aliquam@Quisqueornaretortor.ca,(127) 171-1859,Tempor Bibendum Donec LLC,08/26/2012
|
||||
Miranda,Ingram,fermentum@velitSedmalesuada.net,(864) 873-7359,Feugiat Non Lobortis Institute,08/20/2012
|
||||
Anastasia,Lawrence,Mauris.eu@pedeultrices.net,(106) 260-8688,Sit Amet Consulting,05/31/2012
|
||||
Samson,Patton,non.arcu@enimnislelementum.ca,(302) 330-4251,Hendrerit Associates,12/27/2013
|
||||
Erasmus,Sexton,lectus.justo@aliquam.org,(972) 793-9187,Feugiat Industries,10/15/2013
|
||||
Emery,Gardner,erat@lorem.org,(848) 534-1656,Nunc Sit Amet Industries,08/24/2012
|
||||
Nomlanga,Hensley,Fusce@leoVivamus.org,(644) 169-6243,Consectetuer Company,08/29/2012
|
||||
Jason,Craft,nunc.nulla@sapien.ca,(691) 770-9143,Blandit LLC,03/23/2013
|
||||
Kathleen,Haley,sed.dolor.Fusce@imperdietornare.edu,(891) 454-8400,Lorem Company,07/02/2012
|
||||
Aline,Flynn,a@Nunclaoreet.edu,(563) 400-6803,Et Netus LLP,01/28/2013
|
||||
Ursa,Dickson,Integer.sem@ullamcorpervelit.com,(371) 615-7750,Nullam Company,12/22/2012
|
||||
Wesley,Lopez,enim.non.nisi@vulputateduinec.edu,(287) 777-3724,Lobortis Ultrices Vivamus Corp.,06/17/2013
|
||||
Victoria,Mcleod,lectus.justo.eu@ut.ca,(583) 108-1294,Justo Faucibus Lectus Corporation,10/17/2012
|
||||
Shana,Roach,scelerisque.sed.sapien@afelisullamcorper.edu,(921) 385-2342,Quis Turpis Vitae Incorporated,05/26/2014
|
||||
Maxine,Ruiz,Donec.porttitor@hymenaeosMaurisut.edu,(520) 801-0808,Luctus Foundation,12/05/2013
|
||||
Harriet,Bishop,Quisque@Crasdictum.com,(758) 716-9401,Dictum Phasellus In Inc.,09/08/2013
|
||||
Serina,Williams,tincidunt.vehicula.risus@sedliberoProin.ca,(270) 288-0136,At Egestas A Corporation,03/17/2014
|
||||
Rhea,Copeland,laoreet.ipsum@Aliquam.co.uk,(775) 493-9118,Ipsum Incorporated,05/22/2013
|
||||
Evan,Holcomb,neque.sed@ullamcorperDuis.ca,(695) 656-8621,Sem Institute,02/16/2013
|
||||
Basil,Mccall,arcu.Vestibulum.ante@luctuslobortis.co.uk,(144) 989-4125,Feugiat Tellus Lorem Institute,02/25/2013
|
||||
Florence,Riley,sit.amet@Proinvel.org,(663) 529-4829,Enim Sit PC,01/14/2014
|
||||
Heather,Peck,mauris@scelerisqueneque.edu,(850) 444-0917,Curabitur Limited,01/16/2014
|
||||
Dara,Robinson,egestas@utnisi.net,(106) 576-1355,Urna Incorporated,12/15/2012
|
||||
Kylan,Maxwell,conubia.nostra@accumsan.com,(973) 206-2558,Aliquam Eros Turpis Company,08/21/2012
|
||||
Petra,Blake,faucibus.orci.luctus@dapibusrutrum.ca,(901) 207-9872,Ac Metus Institute,06/17/2013
|
||||
Fiona,Goff,tincidunt@enim.net,(265) 255-7749,Odio Phasellus Corp.,12/03/2012
|
||||
Kameko,Diaz,ac@turpisNulla.edu,(731) 354-4848,Montes Nascetur Corporation,08/16/2013
|
||||
Craig,Valentine,tristique@urnaVivamus.net,(437) 229-8198,Etiam Gravida Molestie Consulting,05/06/2014
|
||||
Samson,Cunningham,semper.pretium@auctor.edu,(335) 666-7758,Nec Ante Associates,07/02/2013
|
||||
Yoko,Rogers,nunc@Vivamus.net,(893) 405-6889,Fermentum Vel Mauris Corp.,03/29/2014
|
||||
Walter,Burnett,nisi.Mauris.nulla@felis.co.uk,(336) 411-9222,Suscipit Est Institute,06/26/2012
|
||||
Gisela,Nash,euismod@lectusrutrum.ca,(917) 249-0166,Non Magna LLP,11/23/2012
|
||||
Wanda,Pierce,Nulla@dolorsit.com,(480) 872-3389,Cum Sociis Natoque Limited,11/02/2013
|
||||
Jane,Dixon,eu.odio@Infaucibus.com,(112) 139-8563,Id Ante Dictum LLC,03/14/2014
|
||||
Octavius,Shannon,iaculis.aliquet@ante.ca,(541) 652-3295,Libero Est Institute,05/28/2014
|
||||
Rigel,Hunt,metus.Aenean.sed@inhendrerit.org,(792) 358-7505,Enim PC,09/05/2013
|
||||
Rachel,Gray,erat.in.consectetuer@Fuscealiquetmagna.org,(165) 973-1366,Suscipit Nonummy Fusce LLC,05/08/2013
|
||||
Madeline,Bradley,dignissim.Maecenas@egetmassaSuspendisse.co.uk,(436) 223-3135,Posuere PC,01/24/2014
|
||||
Emma,Conner,dictum@magnaDuisdignissim.com,(304) 429-2622,Nulla Incorporated,11/05/2013
|
||||
Halee,Mclean,amet.faucibus@Phasellus.net,(669) 364-0148,Ligula Consulting,03/05/2014
|
||||
Conan,Williams,massa@felisNulla.net,(999) 649-4433,Velit Eu Limited,05/15/2014
|
||||
Martena,Fowler,mi.lacinia@maurisa.ca,(405) 661-1762,Blandit Nam Institute,02/27/2013
|
||||
Robin,Buckley,cursus.Nunc.mauris@nislQuisque.net,(376) 771-9862,Sed Corp.,10/30/2012
|
||||
Isadora,Adams,arcu.Vestibulum@urna.co.uk,(138) 774-6058,Blandit Viverra Donec Institute,08/07/2012
|
||||
Bernard,Price,ultrices@Praesent.ca,(368) 882-6146,Egestas Blandit LLP,11/03/2013
|
|
Loading…
Reference in New Issue
Block a user