Question

我在csv文件中有nmea句子。

示例：

$GPGSA,A,3,03,22,06,19,11,14,32,01,28,18,,,1.8,0.8,1.6*3F

我如何用逗号和*？

分隔

with f as csvfile:
    readCSV = csv.reader(csvfile,delimiter=',')

我可以用*替换,吗？

我试图使用readCSV = str.replace("*", ',')但没有成功。

每个完整代码的请求：

import os
import csv
from csv import *
import matplotlib
from numpy import *
from matplotlib import *
import matplotlib.pyplot as plt
from matplotlib.pylab import *
import numpy as np
#import nmea_defs
#from nmea_defs import *


#to export to excel
import xlsxwriter
from xlsxwriter.workbook import Workbook

#to get the csv converter functions
import os
import subprocess
import glob

#to get the datetime functions
import datetime
from datetime import datetime
from pytz import timezone
import time
import calendar

#creates the path needed for incoming and outgoing files
path_in = 'C:/Python34/gps_txts/'
path_out = 'C:/Python34/output_files/'

#prints all the data in the file if you want
q_show_content = input('Print list of files type y:')
if q_show_content == 'y':
    for root, dirs, files in os.walk(path_in):
          print(root, dirs, files)
else:
    print('ok')

data = []  #empty because we will store data into it


#Reads a CSV file and return it as a list of rows
def read_csv_file(filename):
    """Reads a CSV file and return it as a list of rows."""

    for row in csv.reader(open(filename)):
        data.append(row)
    return data

#request of what file to look at
print ("- - - - - - - - - - - - -")
data_file = input('Which file do you want to look at?')

f = open(path_in + data_file)
read_it = read_csv_file(path_in + data_file)


with f as csvfile:
    for line in csvfile:
        row = re.split('[,*]', line)
#        readCSV = csv.reader(csvfile,delimiter=',')



#print number of rows
print ("- - - - - - - - - - - - -")
rows = len(read_it)
print (data_file, " has "+ str(rows) + " rows of data")


#Counts the number of times a GPS command is observed
def list_gps_commands(data):
    """Counts the number of times a GPS command is observed.

Returns a dictionary object."""

    gps_cmds = dict()
    for row in data:
        try:
            gps_cmds[row[0]] += 1 
        except KeyError:
            gps_cmds[row[0]] = 1

    return gps_cmds


class GPS:

    def process_gprmc_data(self, data):
        """Processes GPS data, NMEA 0183 format.
    Returns a tuple of arrays: latitude, longitude, velocity [km/h],
    time [sec] and number of satellites.
    See also: http://www.gpsinformation.org/dale/nmea.htm.
    """
        global NMI
        NMI = 1852.0
        self.latitude  = []
        self.longitude = []
        self.velocity  = []
        self.timestamp = []
        self.speed = []
        self.course = []

        print ("- - - - - - - - - - - - -")
        print('processing gprmc data')
        print ("- - - - - - - - - - - - -")
        for row in data:

            if row[0] == '$GPRMC':     # Valid position/time sentence
                y = (float(row[3][0:2]) + float(row[3][2:])/60.0)
                if row[4] == "S":
                    y = -y
                self.latitude.append(y)
                x = (float(row[5][0:3]) + float(row[5][3:])/60.0)
                if row[6] == "W":
                    x = -x
                self.longitude.append(x)
                self.velocity.append(float(row[7])*NMI/1000.0)
                gpstime = row[1][0:6]                     # hhmmss
                gdate = row[9]                            # ddmmyy
                gpsdate = gdate[4:6]+gdate[2:4]+gdate[0:2]  # yymmdd
                real_time =gpsdate + gpstime
                add_date_time = datetime.strptime(real_time, "%y%m%d%H%M%S")
                self.timestamp.append(add_date_time)
                knots2mph = (float(row[7])*1.15078)
                self.speed.append(knots2mph)
                self.course.append(float(row[8]))

        return (array(self.latitude), array(self.longitude), array(self.velocity), array(self.timestamp), array(self.speed), array(self.course))

    def process_gpgga_data(self, data):
        """Processes GPS data, NMEA 0183 format.
    Returns a tuple of arrays: latitude, longitude, velocity [km/h],
    time [sec] and number of satellites.
    See also: http://www.gpsinformation.org/dale/nmea.htm.
    """

        self.latitude  = []
        self.longitude = []
        self.altitude  = []
        self.timestamp = []
        self.num_sats  = []

        print ("- - - - - - - - - - - - -")
        print('processing gpgga data')
        print ("- - - - - - - - - - - - -")
        for row in data:

            if row[0] == '$GPGGA':     # Valid position/time sentence
                y = (float(row[2][0:2]) + float(row[2][2:])/60.0)
                if row[3] == "S":
                    y = -y
                self.latitude.append(y)
                x = (float(row[4][0:3]) + float(row[4][3:])/60.0)
                if row[5] == "W":
                    x = -x
                self.longitude.append(x)
                gpstime = row[1][0:6]                     # hhmmss
                add_time = datetime.strptime(gpstime, "%H%M%S")
                self.timestamp.append(add_time)
                self.altitude.append(float(row[9]))
                self.num_sats.append(float(row[7]))                       

        return (array(self.latitude), array(self.longitude), array(self.altitude), array(self.timestamp), array(self.num_sats))      

    def process_gpgll_data(self, data):
        """Processes GPS data, NMEA 0183 format.
    Returns a tuple of arrays: latitude, longitude, velocity [km/h],
    time [sec] and number of satellites.
    See also: http://www.gpsinformation.org/dale/nmea.htm.
    """

        self.latitude  = []
        self.longitude = []
        self.timestamp = []


        print ("- - - - - - - - - - - - -")
        print('processing gpgll data')
        print ("- - - - - - - - - - - - -")
        for row in data:

            if row[0] == '$GPGLL':     # Valid position/time sentence
                y = (float(row[1][0:2]) + float(row[1][2:])/60.0)
                if row[2] == "S":
                    y = -y
                self.latitude.append(y)
                x = (float(row[3][0:3]) + float(row[3][3:])/60.0)
                if row[4] == "W":
                    x = -x
                self.longitude.append(x)
                gpstime = row[5][0:6]                     # hhmmss
                add_time = datetime.strptime(gpstime, "%H%M%S")
                self.timestamp.append(add_time)

        return (array(self.latitude), array(self.longitude), array(self.timestamp))            

    def process_gpgsa_data(self,data):
        """Processes GSA data, NMEA 0183 format.
    Returns a tuple of arrays: latitude, longitude, velocity [km/h],
    time [sec] and number of satellites.
    See also: http://www.gpsinformation.org/dale/nmea.htm.
    """

        self.fix = []
        self.sats = []
        self.pdop = []
        self.hdop = []
        self.vdop = []


        print ("- - - - - - - - - - - - -")
        print('processing gpgsa data')
        print ("- - - - - - - - - - - - -")
        for row in data:
            if row[0] == '$GPGSA':     # Valid position/time sentence
                self.fix.append(float(row[2]))
                self.sats.append(str(row[3:15]))
                self.pdop.append(float(row[15]))
                self.hdop.append(float(row[16]))
                self.vdop.append(float(row[17]))

        return (array(self.fix), array(self.sats), array(self.pdop), array(self.hdop), array(self.vdop))



class CreateWorkbook:
    def openworkbook(self, data):
        global output_filename
        output_filename = input('output filename:')
        global workbook
        workbook = xlsxwriter.Workbook(path_out + output_filename + '_' + command_type +'.xlsx')
        self.worksheet = workbook.add_worksheet()
        #formatting definitions
        global bold
        bold = workbook.add_format({'bold': True})
        global date_format
        date_format = workbook.add_format({'num_format': "m/d/yyyy hh:mm:ss"})
        global time_format
        time_format = workbook.add_format({'num_format': "hh:mm:ss"})   

    def closeworkbook_gprmc(self, data):
        print('closeworkbook')
        #pull data from process_gprmc_data
        gps = GPS()
        (lati, long, v, t_stamp, mph, crse) = gps.process_gprmc_data(data)
        #sets up the header row
        self.worksheet.write('A1','TimeStamp',bold)
        self.worksheet.write('B1', 'Latitude',bold)
        self.worksheet.write('C1', 'Longitude',bold)
        self.worksheet.write('D1', 'Velocity',bold)
        self.worksheet.write('E1', 'Speed-MPH',bold)
        self.worksheet.write('F1', 'Course',bold)
        self.worksheet.autofilter('A1:F1')   #dropdown menu created for filtering

        # Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
        for r, row in enumerate(data, start=1):  #where you want to start printing results inside workbook
            for c, col in enumerate(data):
                self.worksheet.write_column(r,0, t_stamp, date_format)
                self.worksheet.write_column(r,1, lati)
                self.worksheet.write_column(r,2, long)
                self.worksheet.write_column(r,3, v)
                self.worksheet.write_column(r,4, mph)
                self.worksheet.write_column(r,5, crse)

        workbook.close()
        csvfile.close()
        print('XLSX file named ' + output_filename + '_' + command_type +' was created')

    def closeworkbook_gpgga(self, data):
        print('closeworkbook')
        #pull data from process_gpgga_data
        gps = GPS()
        (lati, long, alt, t_stamp,numsats) = gps.process_gpgga_data(data)
        #sets up the header row
        self.worksheet.write('A1','TimeStamp',bold)
        self.worksheet.write('B1', 'Latitude',bold)
        self.worksheet.write('C1', 'Longitude',bold)
        self.worksheet.write('D1', 'Altitude',bold)
        self.worksheet.write('E1', 'Num_sats',bold)
        self.worksheet.autofilter('A1:E1')   #dropdown menu created for filtering

        # Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
        for r, row in enumerate(data, start=1):  #where you want to start printing results inside workbook
            for c, col in enumerate(data):
                self.worksheet.write_column(r,0, t_stamp, time_format)
                self.worksheet.write_column(r,1, lati)
                self.worksheet.write_column(r,2, long)
                self.worksheet.write_column(r,3, alt)
                self.worksheet.write_column(r,4, numsats)

        workbook.close()
        f.close()
        print('XLSX file named ' + output_filename + '_' + command_type + ' was created')      

    def closeworkbook_gpgll(self, data):
        print('closeworkbook')
        #pull data from process_gpgga_data
        gps = GPS()
        (lati, long, t_stamp) = gps.process_gpgll_data(data)
        #sets up the header row
        self.worksheet.write('A1','TimeStamp',bold)
        self.worksheet.write('B1', 'Latitude',bold)
        self.worksheet.write('C1', 'Longitude',bold)
        self.worksheet.autofilter('A1:C1')   #dropdown menu created for filtering

        # Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
        for r, row in enumerate(data, start=1):  #where you want to start printing results inside workbook
            for c, col in enumerate(data):
                self.worksheet.write_column(r,0, t_stamp, time_format)
                self.worksheet.write_column(r,1, lati)
                self.worksheet.write_column(r,2, long)

        workbook.close()
        f.close()
        print('XLSX file named ' + output_filename + '_' + command_type + ' was created')      

    def closeworkbook_gpgsa(self, data):
        print('closeworkbook')
        #pull data from process_gprmc_data
        gps = GPS()
        (fix, sats, pdop, hdop, vdop) = gps.process_gpgsa_data(data)
        #sets up the header row
        self.worksheet.write('A1','Fix',bold)
        self.worksheet.write('B1', 'Satellites',bold)
        self.worksheet.write('C1', 'Position DOP',bold)
        self.worksheet.write('D1', 'Horizon DOP',bold)
        self.worksheet.write('E1', 'Vertical DOP',bold)
        self.worksheet.autofilter('A1:E1')   #dropdown menu created for filtering

        # Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
        for r, row in enumerate(data, start=1):  #where you want to start printing results inside workbook
            for c, col in enumerate(data):
                self.worksheet.write_column(r,0, fix)
                self.worksheet.write_column(r,1, sats)
                self.worksheet.write_column(r,2, pdop)
                self.worksheet.write_column(r,3, hdop)
                self.worksheet.write_column(r,4, vdop)

        workbook.close()
        csvfile.close()
        print('XLSX file named ' + output_filename + '_' + command_type +' was created')



class ConvertFile:
    def convert2csv(self, data):
        # set path to folder containing xlsx files
        os.chdir(path_out)

        # find the file with extension .xlsx
        xlsx = glob.glob(output_filename + '_' + command_type + '.xlsx')

        # create output filenames with extension .csv
        csvs = [x.replace('.xlsx','.csv') for x in xlsx]

        # zip into a list of tuples
        in_out = zip(xlsx,csvs)

        # loop through each file, calling the in2csv utility from subprocess
        for xl,csv in in_out:
           out = open(csv,'w')
           command = 'c:/python34/scripts/in2csv %s\\%s' % (path_out,xl)
           proc = subprocess.Popen(command,stdout=out)
           proc.wait()
           out.close()
        print('CSV file named ' + output_filename + '_' + command_type + ' was created')

    def convert2kml(self, data):
        #Input the file name.
        h = open(path_out + output_filename + '_' + command_type + '.csv')
        with h as csvfile2:
            data2 = csv.reader(csvfile2,delimiter=',')
            next(data2)

            #Open the file to be written.
            g = open(output_filename + '_' + command_type +'.kml','w')
            g.write("<?xml version='1.0' encoding='UTF-8'?>\n")
            g.write("<kml xmlns='http://earth.google.com/kml/2.1'>\n")
            g.write("<Document>\n")
            g.write("   <name>" + output_filename + '_' + command_type + '.kml' +"</name>\n")
            for row in data2:
                g.write("   <Placemark>\n")
                g.write("         <TimeStamp><when>" + str(row[0]) + "</when></TimeStamp>\n")
                g.write("             <Point>\n")
                g.write("              <coordinates>" + str(row[2]) + "," + str(row[1]) + "</coordinates>\n")
                g.write("             </Point>\n")
                g.write("   </Placemark>\n")
        g.write("</Document>\n")
        g.write("</kml>\n")
        g.close()
        h.close()
        print('and ' + output_filename + '_' + command_type +'.kml was created,too!')
        print ("- - - - - - - - - - - - -")

class PrintGPS(GPS):

    def process_gprmc_data(self, data):
        print('PrintGPS')
        # how to call process_gprmc_data()
        (lati, long, v, t_stamp, mph, crse) = gps.process_gprmc_data(data)

        print('got definitions in')
        print ("- - - - - - - - - - - - -")
        print('lat:',lati)
        print ("- - - - - - - - - - - - -")
        print('long:',long)
        print ("- - - - - - - - - - - - -")
        print('v:',v)
        print ("- - - - - - - - - - - - -")
        print('date:', t_stamp)
        print ("- - - - - - - - - - - - -")

        if rows > 200:
            print('Big file please wait...thinking')

    def process_gpgga_data(self, data):    
        # how to call process_gpgga_data()
        (lati, long, alt, t_stamp,numsats) = gps.process_gpgga_data(data)

        print('got definitions in')
        print ("- - - - - - - - - - - - -")
        print('lat:',lati)
        print ("- - - - - - - - - - - - -")
        print('long:',long)
        print ("- - - - - - - - - - - - -")
        print('alt:',alt)
        print ("- - - - - - - - - - - - -")
        print('date:', t_stamp)
        print ("- - - - - - - - - - - - -")
        print('numsats:', numsats)
        print ("- - - - - - - - - - - - -")

        if rows > 200:
            print('Big file please wait...thinking')

    def process_gpgll_data(self, data):    
        # how to call process_gpgll_data()
        (lati, long, t_stamp) = gps.process_gpgga_data(data)

        print('got definitions in')
        print ("- - - - - - - - - - - - -")
        print('lat:',lati)
        print ("- - - - - - - - - - - - -")
        print('long:',long)
        print ("- - - - - - - - - - - - -")
        print('time:', t_stamp)
        print ("- - - - - - - - - - - - -")

        if rows > 200:
            print('Big file please wait...thinking')

    def process_gpgsa_data(self, data):
        print('PrintGPS')
        # how to call process_gprmc_data()
        (fix, sats, pdop, hdop, vdop) = gps.process_gpgsa_data(data)

        print('got definitions in')
        print ("- - - - - - - - - - - - -")
        print('fix:',fix)
        print ("- - - - - - - - - - - - -")
        print('sats:',sats)
        print ("- - - - - - - - - - - - -")
        print('pdop:',pdop)
        print ("- - - - - - - - - - - - -")
        print('hdop:', hdop)
        print ("- - - - - - - - - - - - -")
        print('vdop:', vdop)
        print ("- - - - - - - - - - - - -")
        if rows > 200:
            print('Big file please wait...thinking')



#processing piece
keep_asking = True
while keep_asking:
    print ("- - - - - - - - - - - - -")
    print(list_gps_commands(read_it))
    print ("- - - - - - - - - - - - -")
    command_type = input("What message type do you want to look at?")



    if command_type in ('$GPGGA', 'GPGGA', 'GGA','$gpgga', 'gpgga', 'gga'):
        #define the classes
        gps = GPS()
        createworkbook = CreateWorkbook()
        convertfile = ConvertFile()
        print_gps = PrintGPS()
        #do the deeds
        createworkbook.openworkbook(data)
        print_gps.process_gpgga_data(data)
        createworkbook.closeworkbook_gpgga(data) 
        convertfile.convert2csv(data)
        convertfile.convert2kml(data)
    elif command_type in ('$GPRMC', 'GPRMC', 'RMC', '$gprmc', 'gprmc', 'rmc'):
        #define the classes
        gps = GPS()
        createworkbook = CreateWorkbook()
        convertfile = ConvertFile()
        print_gps = PrintGPS()
        #do the deeds
        createworkbook.openworkbook(data)
        print_gps.process_gprmc_data(data)
        createworkbook.closeworkbook_gprmc(data) 
        convertfile.convert2csv(data)
        convertfile.convert2kml(data)
    elif command_type in ('$GPGLL', 'GPGLL', 'GLL', '$gpgll', 'gpgll', 'gll'):
        #define the classes
        gps = GPS()
        createworkbook = CreateWorkbook()
        convertfile = ConvertFile()
        print_gps = PrintGPS()
        #do the deeds
        createworkbook.openworkbook(data)
        print_gps.process_gpgll_data(data)
        createworkbook.closeworkbook_gpgll(data) 
        convertfile.convert2csv(data)
        convertfile.convert2kml(data)
    elif command_type in ('$GPGSA', 'GPGSA', 'GSA', '$gpgsa', 'gpgsa', 'gsa'):
        #define the classes
        gps = GPS()
        createworkbook = CreateWorkbook()
        convertfile = ConvertFile()
        print_gps = PrintGPS()
        #do the deeds
        createworkbook.openworkbook(data)
        print_gps.process_gpgsa_data(data)
        createworkbook.closeworkbook_gpgsa(data) 
        convertfile.convert2csv(data)  
    elif command_type in ('$GPVTG', 'GPVTG', 'VTG', '$gpvtg', 'gpvtg', 'vtg'):
        print('Better check $GPRMC')


    list_gps_commands(data)     
    wannalook = input('Want to look at another message or no?')
    if not wannalook.startswith('y'):
        keep_asking = False
        print('********************')
        print('**mischief managed**')
        print('********************')

Answer 1

csv不支持多个分隔符（从技术上讲，这也不是CSV文件）。看一下您的文件格式，我说您可以使用re.split()解析它而不会出现问题：

with open(...) as csvfile:
    for line in csvfile:
        fields = re.split('[,*]', line)

仅当您的文件包含带引号的字符串时，才会遇到问题。

Answer 2

考虑到您正在进行大量的数值计算，您可能会发现pandas很有用，使用两个分隔符读取csv文件非常简单：

import pandas as pd

df = pd.read_csv(infile,sep=",|\*")

我怎么能有两个分隔符？

2 个答案: