Question

我应该从.txt文件中获取某些信息并输出它。这是我需要的信息：

人口最多的州
人口最少的州
平均州人口
德克萨斯州人口

DATA看起来像：

Alabama
AL
4802982
Alaska
AK
721523
Arizona
AZ
6412700
Arkansas
AR
2926229
California
CA
37341989

这是我的代码，它并没有真正做我需要做的任何事情：

def main():
    # Open the StateCensus2010.txt file.
    census_file = open('StateCensus2010.txt', 'r')
    # Read the state name
    state_name = census_file.readline()

    while state_name != '':
        state_abv = census_file.readline()
        population = int(census_file.readline())

        state_name = state_name.rstrip('\n')
        state_abv = state_abv.rstrip('\n')

        print('State Name: ', state_name)
        print('State Abv.: ', state_abv)
        print('Population: ', population)
        print()

        state_name = census_file.readline()
    census_file.close()
main()

我所做的就是阅读州名，abv并将人口转换为int。我不需要它做任何事情，但是我不确定如何做任务要求的事情。任何提示肯定会受到赞赏！我在过去的几个小时里一直在尝试一些事情但无济于事。

更新

这是我更新的代码，但是我收到了以下错误：

Traceback (most recent call last):
  File "main.py", line 13, in <module>
    if population > max_population:
TypeError: unorderable types: str() > int()

代码：

with open('StateCensus2010.txt', 'r') as census_file:
    while True:
        try:
            state_name = census_file.readline()
            state_abv = census_file.readline()
            population = int(census_file.readline())
        except IOError:
            break

        # data processing here
        max_population = 0
        for population in census_file:
          if population > max_population:
            max_population = population

        print(max_population)

Answer 1

由于数据的顺序一致;州名，州Abv，人口。所以你只需要读一次这些行，并显示所有三个3信息。以下是示例代码。

average = 0.0
total = 0.0
state_min = 999999999999
state_max = 0
statename_min = ''
statename_max = ''
texas_population = 0
with open('StateCensus2010.txt','r') as file:
    # split new line, '\n' here means newline

    data = file.read().split('\n')

    # get the length of the data by using len() method
    # there are 50 states in the text file
    # each states have 3 information stored,
    # state name, state abreviation, population
    # that's why length of data which is 150/3 = 50 states
    state_total = len(data)/3 


    # this count is used as an index for the list 
    count = 0
    for i in range(int(state_total)):

        statename = data[count]
        state_abv = data[count+1]
        population = int(data[count+2])

        print('Statename : ',statename)
        print('State Abv : ',state_abv)
        print('Population: ',population)
        print()

        # sum all states population
        total += population

        if population > state_max:
            state_max = population
            statename_max = statename

        if population < state_min:
            state_min = population
            statename_min = statename

        if statename == 'Texas':
            texas_population = population


        # add 3 because we want to jump to next state
        # for example the first three lines is Alabama info
        # the next three lines is Alaska info and so on
        count += 3


    # divide the total population with number of states 
    average = total/state_total
    print(str(average))

    print('Lowest population state :', statename_min)
    print('Highest population state :', statename_max)
    print('Texas population :', texas_population)

Answer 2

使用pandas这个问题非常简单。

<强>代码：

states = []
for line in data:
    states.append(
        dict(state=line.strip(),
             abbrev=next(data).strip(),
             pop=int(next(data)),
             )
    )

df = pd.DataFrame(states)
print(df)

print('\nmax population:\n', df.ix[df['pop'].idxmax()])
print('\nmin population:\n', df.ix[df['pop'].idxmin()])
print('\navg population:\n', df['pop'].mean())
print('\nAZ population:\n', df[df.abbrev == 'AZ'])

测试数据：

from io import StringIO
data = StringIO(u'\n'.join([x.strip() for x in """
    Alabama
    AL
    4802982
    Alaska
    AK
    721523
    Arizona
    AZ
    6412700
    Arkansas
    AR
    2926229
    California
    CA
    37341989
""".split('\n')[1:-1]]))

<强>结果：

  abbrev       pop       state
0     AL   4802982     Alabama
1     AK    721523      Alaska
2     AZ   6412700     Arizona
3     AR   2926229    Arkansas
4     CA  37341989  California

max population:
abbrev            CA
pop         37341989
state     California
Name: 4, dtype: object

min population:
abbrev        AK
pop       721523
state     Alaska
Name: 1, dtype: object

avg population:
10441084.6

AZ population:
  abbrev      pop    state
2     AZ  6412700  Arizona

Answer 3

请尝试这个以前的代码不兼容python 3。它支持python 2.7

    def extract_data(state):
        total_population = 0
        for states, stats in state.items():
            population = stats.get('population')
            state_name = stats.get('state_name')
            states = states

        total_population = population + total_population

        if 'highest' not in vars():
            highest = population
            higherst_state_name = state_name
            highest_state = states

        if 'lowest' not in vars():
            lowest = population
            lowest_state_name = state_name
            lowest_state = states

        if highest < population:
            highest = population
            higherst_state_name = state_name
            highest_state = states        

        if lowest > population:
            lowest = population
            lowest_state_name = state_name
            lowest_state = states


    print(highest_state, highest)
    print(lowest_state, lowest)
    print(len(state))
    print(int(total_population/len(state)))
    print(state.get('TX').get('population'))

def main():
    # Open the StateCensus2010.txt file.
    census_file = open('states.txt', 'r')
    # Read the state name
    state_name = census_file.readline()
    state = {}


    while state_name != '':
        state_abv = census_file.readline()
        population = int(census_file.readline())
        state_name = state_name.rstrip('\n')
        state_abv = state_abv.rstrip('\n')

        if state_abv in state:
            state[state_abv].update({'population': population, 'state_name': state_name})
        else:
            state.setdefault(state_abv,{'population': population, 'state_name': state_name})

        state_name = census_file.readline()        
    census_file.close()
    return state

state=main()
extract_data(state)

Answer 4

来自解释器的另一个pandas解决方案：

>>> import pandas as pd
>>>
>>> records = [line.strip() for line in open('./your.txt', 'r')]
>>>
>>> df = pd.DataFrame([records[i:i+3] for i in range(0, len(records), 3)], 
...     columns=['State', 'Code', 'Pop']).dropna()
>>>
>>> df['Pop'] = df['Pop'].astype(int)
>>>
>>> df
        State Code       Pop
0     Alabama   AL   4802982
1      Alaska   AK    721523
2     Arizona   AZ   6412700
3    Arkansas   AR   2926229
4  California   CA  37341989
>>>
>>> df.ix[df['Pop'].idxmax()]
State    California
Code             CA
Pop        37341989
Name: 4, dtype: object
>>>
>>> df.ix[df['Pop'].idxmin()]
State    Alaska
Code         AK
Pop      721523
Name: 1, dtype: object
>>>
>>> df['Pop'].mean()
10441084.6
>>>
>>> df.ix[df['Code'] == 'AZ' ]
     State Code      Pop
2  Arizona   AZ  6412700

从.txt文件中读取统计信息并输出它们

4 个答案: