按子集标准化数据

时间:2014-07-16 11:14:55

标签: r loops normalization subset

我试图将数据标准化(使用来自SO的自定义函数),在我的情况下NFL按季节传递统计数据。我的原始数据框架在2004年至2013年期间一起,但由于我需要按季节对其进行标准化,我能想到的唯一方法是按季节将其分解,将其标准化,然后将其合并。

虽然我的代码工作似乎非常效率低,所以任何帮助(对于这个问题和你看到的任何其他内容)都会受到赞赏。

代码:

# custom functions
normalize <- function(x) { 
  return((x - min(x)) / (max(x) - min(x)))
}

trim <- function (x) gsub("^\\s+|\\s+$", "", x)

first_season <- 2004
last_season <- 2013
num_seasons <- as.numeric(last_season - first_season + 1)

# seasons <- seq(2004, 2013, by=1)
seasons <- seq(first_season, last_season, by=1)
rm(first_season, last_season)

library(XML)
library(plyr)
library(stringr)

# Passing 

for (i in 1:num_seasons) {
  url <- paste("http://www.pro-football-reference.com/years/", seasons[i],"/passing.htm", sep = "")
  df <- readHTMLTable(url,which=1)
  df$season = seasons[i]
  df <- df[!names(df) %in% c("QBrec") ] 
  if(df$season >= 2008) df <- df[!names(df) %in% c("QBR") ] # Removes QBR 2008+
  assign (paste("passing_", seasons[i], sep = "") , df)
  rm(df)
  print(seasons[i])
}

names(passing_2005) <- names(passing_2004)
names(passing_2006) <- names(passing_2004)
names(passing_2007) <- names(passing_2004)
names(passing_2008) <- names(passing_2004)
names(passing_2009) <- names(passing_2004)
names(passing_2010) <- names(passing_2004)
names(passing_2011) <- names(passing_2004)
names(passing_2012) <- names(passing_2004)
names(passing_2013) <- names(passing_2004)

# Combine all Passing seasons
passing <- rbind(passing_2004, passing_2005, passing_2006, passing_2007, passing_2008, 
                 passing_2009, passing_2010, passing_2011, passing_2012, passing_2013)

# Remove all individual Passing seasons
rm(passing_2004, passing_2005, passing_2006, passing_2007, passing_2008, 
   passing_2009, passing_2010, passing_2011, passing_2012, passing_2013)

names(passing) <- c("rank_pfr", "nameinfo", "team", "age", "games", "games_started",
                    #"qb_record", 
                    "completions", "attempts", "comp_pct", "yards_passing",
                    "td_passing", "td_pct", "interceptions", "int_pct", "long_passing",
                    "yards_pass_att", "yards_pass_att_avg", "yards_pass_comp","yards_pass_game",
                    "pass_rate", "sacks", "sacks_pass", "yards_net_pass_att", "yards_net_pass_att_avg",
                    "sacks_pct", "comebacks", "game_win_drives", "season")

passing <- passing[which(passing$rank_pfr!='Rk'), ]

# Convert from Factor to Numeric
passing$rank_pfr <- as.numeric(as.character(passing$rank_pfr))
passing$age <- as.numeric(as.character(passing$age))
passing$games <- as.numeric(as.character(passing$games))
passing$games_started <- as.numeric(as.character(passing$games_started))
passing$completions <- as.numeric(as.character(passing$completions))
passing$attempts <- as.numeric(as.character(passing$attempts))
passing$comp_pct <- as.numeric(as.character(passing$comp_pct))
passing$yards_passing <- as.numeric(as.character(passing$yards_passing))
passing$td_passing <- as.numeric(as.character(passing$td_passing))
passing$td_pct <- as.numeric(as.character(passing$td_pct))
passing$interceptions <- as.numeric(as.character(passing$interceptions))
passing$int_pct <- as.numeric(as.character(passing$int_pct))
passing$long_passing <- as.numeric(as.character(passing$long_passing))
passing$yards_pass_att <- as.numeric(as.character(passing$yards_pass_att))
passing$yards_pass_att_avg <- as.numeric(as.character(passing$yards_pass_att_avg))
passing$yards_pass_comp <- as.numeric(as.character(passing$yards_pass_comp))
passing$yards_pass_game <- as.numeric(as.character(passing$yards_pass_game))
passing$pass_rate <- as.numeric(as.character(passing$pass_rate))
passing$sacks <- as.numeric(as.character(passing$sacks))
passing$sacks_pass <- as.numeric(as.character(passing$sacks_pass))
passing$yards_net_pass_att <- as.numeric(as.character(passing$yards_net_pass_att))
passing$yards_net_pass_att_avg <- as.numeric(as.character(passing$yards_net_pass_att_avg))
passing$sacks_pct <- as.numeric(as.character(passing$sacks_pct))
passing$comebacks <- as.numeric(as.character(passing$comebacks))
passing$game_win_drives <- as.numeric(as.character(passing$game_win_drives))
passing$season <- as.numeric(as.character(passing$season))

passing[is.na(passing)] <- 0

# This is where I break it out by season to normalize, but I'd rather 
# just keep it in "passing" as opposed to "passing_20XX"

for (i in 1:num_seasons) {
  assign (paste("passing_", seasons[i], sep = "") , passing[passing$season == seasons[i], ])
  print(seasons[i])
}

rm(passing)

# 
passing_2004$age <- normalize(passing_2004$age)
passing_2004$games <- normalize(passing_2004$games)
passing_2004$games_started <- normalize(passing_2004$games_started)
passing_2004$completions <- normalize(passing_2004$completions)
passing_2004$attempts <- normalize(passing_2004$attempts)
passing_2004$comp_pct <- normalize(passing_2004$comp_pct)
passing_2004$yards_passing <- normalize(passing_2004$yards_passing)
passing_2004$td_passing <- normalize(passing_2004$td_passing)
passing_2004$td_pct <- normalize(passing_2004$td_pct)
passing_2004$interceptions <- normalize(passing_2004$interceptions)
passing_2004$int_pct <- normalize(passing_2004$int_pct)
passing_2004$long_passing <- normalize(passing_2004$long_passing)
passing_2004$yards_pass_att <- normalize(passing_2004$yards_pass_att)
passing_2004$yards_pass_att_avg <- normalize(passing_2004$yards_pass_att_avg)
passing_2004$yards_pass_comp <- normalize(passing_2004$yards_pass_comp)
passing_2004$yards_pass_game <- normalize(passing_2004$yards_pass_game)
passing_2004$pass_rate <- normalize(passing_2004$pass_rate)
passing_2004$sacks <- normalize(passing_2004$sacks)
passing_2004$sacks_pass <- normalize(passing_2004$sacks_pass)
passing_2004$yards_net_pass_att <- normalize(passing_2004$yards_net_pass_att)
passing_2004$yards_net_pass_att_avg <- normalize(passing_2004$yards_net_pass_att_avg)
passing_2004$sacks_pct <- normalize(passing_2004$sacks_pct)
passing_2004$comebacks <- normalize(passing_2004$comebacks)
passing_2004$game_win_drives <- normalize(passing_2004$game_win_drives)

passing_2005$age <- normalize(passing_2005$age)
passing_2005$games <- normalize(passing_2005$games)
passing_2005$games_started <- normalize(passing_2005$games_started)
passing_2005$completions <- normalize(passing_2005$completions)
passing_2005$attempts <- normalize(passing_2005$attempts)
passing_2005$comp_pct <- normalize(passing_2005$comp_pct)
passing_2005$yards_passing <- normalize(passing_2005$yards_passing)
passing_2005$td_passing <- normalize(passing_2005$td_passing)
passing_2005$td_pct <- normalize(passing_2005$td_pct)
passing_2005$interceptions <- normalize(passing_2005$interceptions)
passing_2005$int_pct <- normalize(passing_2005$int_pct)
passing_2005$long_passing <- normalize(passing_2005$long_passing)
passing_2005$yards_pass_att <- normalize(passing_2005$yards_pass_att)
passing_2005$yards_pass_att_avg <- normalize(passing_2005$yards_pass_att_avg)
passing_2005$yards_pass_comp <- normalize(passing_2005$yards_pass_comp)
passing_2005$yards_pass_game <- normalize(passing_2005$yards_pass_game)
passing_2005$pass_rate <- normalize(passing_2005$pass_rate)
passing_2005$sacks <- normalize(passing_2005$sacks)
passing_2005$sacks_pass <- normalize(passing_2005$sacks_pass)
passing_2005$yards_net_pass_att <- normalize(passing_2005$yards_net_pass_att)
passing_2005$yards_net_pass_att_avg <- normalize(passing_2005$yards_net_pass_att_avg)
passing_2005$sacks_pct <- normalize(passing_2005$sacks_pct)
passing_2005$comebacks <- normalize(passing_2005$comebacks)
passing_2005$game_win_drives <- normalize(passing_2005$game_win_drives)

passing_2006$age <- normalize(passing_2006$age)
passing_2006$games <- normalize(passing_2006$games)
passing_2006$games_started <- normalize(passing_2006$games_started)
passing_2006$completions <- normalize(passing_2006$completions)
passing_2006$attempts <- normalize(passing_2006$attempts)
passing_2006$comp_pct <- normalize(passing_2006$comp_pct)
passing_2006$yards_passing <- normalize(passing_2006$yards_passing)
passing_2006$td_passing <- normalize(passing_2006$td_passing)
passing_2006$td_pct <- normalize(passing_2006$td_pct)
passing_2006$interceptions <- normalize(passing_2006$interceptions)
passing_2006$int_pct <- normalize(passing_2006$int_pct)
passing_2006$long_passing <- normalize(passing_2006$long_passing)
passing_2006$yards_pass_att <- normalize(passing_2006$yards_pass_att)
passing_2006$yards_pass_att_avg <- normalize(passing_2006$yards_pass_att_avg)
passing_2006$yards_pass_comp <- normalize(passing_2006$yards_pass_comp)
passing_2006$yards_pass_game <- normalize(passing_2006$yards_pass_game)
passing_2006$pass_rate <- normalize(passing_2006$pass_rate)
passing_2006$sacks <- normalize(passing_2006$sacks)
passing_2006$sacks_pass <- normalize(passing_2006$sacks_pass)
passing_2006$yards_net_pass_att <- normalize(passing_2006$yards_net_pass_att)
passing_2006$yards_net_pass_att_avg <- normalize(passing_2006$yards_net_pass_att_avg)
passing_2006$sacks_pct <- normalize(passing_2006$sacks_pct)
passing_2006$comebacks <- normalize(passing_2006$comebacks)
passing_2006$game_win_drives <- normalize(passing_2006$game_win_drives)

passing_2007$age <- normalize(passing_2007$age)
passing_2007$games <- normalize(passing_2007$games)
passing_2007$games_started <- normalize(passing_2007$games_started)
passing_2007$completions <- normalize(passing_2007$completions)
passing_2007$attempts <- normalize(passing_2007$attempts)
passing_2007$comp_pct <- normalize(passing_2007$comp_pct)
passing_2007$yards_passing <- normalize(passing_2007$yards_passing)
passing_2007$td_passing <- normalize(passing_2007$td_passing)
passing_2007$td_pct <- normalize(passing_2007$td_pct)
passing_2007$interceptions <- normalize(passing_2007$interceptions)
passing_2007$int_pct <- normalize(passing_2007$int_pct)
passing_2007$long_passing <- normalize(passing_2007$long_passing)
passing_2007$yards_pass_att <- normalize(passing_2007$yards_pass_att)
passing_2007$yards_pass_att_avg <- normalize(passing_2007$yards_pass_att_avg)
passing_2007$yards_pass_comp <- normalize(passing_2007$yards_pass_comp)
passing_2007$yards_pass_game <- normalize(passing_2007$yards_pass_game)
passing_2007$pass_rate <- normalize(passing_2007$pass_rate)
passing_2007$sacks <- normalize(passing_2007$sacks)
passing_2007$sacks_pass <- normalize(passing_2007$sacks_pass)
passing_2007$yards_net_pass_att <- normalize(passing_2007$yards_net_pass_att)
passing_2007$yards_net_pass_att_avg <- normalize(passing_2007$yards_net_pass_att_avg)
passing_2007$sacks_pct <- normalize(passing_2007$sacks_pct)
passing_2007$comebacks <- normalize(passing_2007$comebacks)
passing_2007$game_win_drives <- normalize(passing_2007$game_win_drives)

passing_2008$age <- normalize(passing_2008$age)
passing_2008$games <- normalize(passing_2008$games)
passing_2008$games_started <- normalize(passing_2008$games_started)
passing_2008$completions <- normalize(passing_2008$completions)
passing_2008$attempts <- normalize(passing_2008$attempts)
passing_2008$comp_pct <- normalize(passing_2008$comp_pct)
passing_2008$yards_passing <- normalize(passing_2008$yards_passing)
passing_2008$td_passing <- normalize(passing_2008$td_passing)
passing_2008$td_pct <- normalize(passing_2008$td_pct)
passing_2008$interceptions <- normalize(passing_2008$interceptions)
passing_2008$int_pct <- normalize(passing_2008$int_pct)
passing_2008$long_passing <- normalize(passing_2008$long_passing)
passing_2008$yards_pass_att <- normalize(passing_2008$yards_pass_att)
passing_2008$yards_pass_att_avg <- normalize(passing_2008$yards_pass_att_avg)
passing_2008$yards_pass_comp <- normalize(passing_2008$yards_pass_comp)
passing_2008$yards_pass_game <- normalize(passing_2008$yards_pass_game)
passing_2008$pass_rate <- normalize(passing_2008$pass_rate)
passing_2008$sacks <- normalize(passing_2008$sacks)
passing_2008$sacks_pass <- normalize(passing_2008$sacks_pass)
passing_2008$yards_net_pass_att <- normalize(passing_2008$yards_net_pass_att)
passing_2008$yards_net_pass_att_avg <- normalize(passing_2008$yards_net_pass_att_avg)
passing_2008$sacks_pct <- normalize(passing_2008$sacks_pct)
passing_2008$comebacks <- normalize(passing_2008$comebacks)
passing_2008$game_win_drives <- normalize(passing_2008$game_win_drives)

passing_2009$age <- normalize(passing_2009$age)
passing_2009$games <- normalize(passing_2009$games)
passing_2009$games_started <- normalize(passing_2009$games_started)
passing_2009$completions <- normalize(passing_2009$completions)
passing_2009$attempts <- normalize(passing_2009$attempts)
passing_2009$comp_pct <- normalize(passing_2009$comp_pct)
passing_2009$yards_passing <- normalize(passing_2009$yards_passing)
passing_2009$td_passing <- normalize(passing_2009$td_passing)
passing_2009$td_pct <- normalize(passing_2009$td_pct)
passing_2009$interceptions <- normalize(passing_2009$interceptions)
passing_2009$int_pct <- normalize(passing_2009$int_pct)
passing_2009$long_passing <- normalize(passing_2009$long_passing)
passing_2009$yards_pass_att <- normalize(passing_2009$yards_pass_att)
passing_2009$yards_pass_att_avg <- normalize(passing_2009$yards_pass_att_avg)
passing_2009$yards_pass_comp <- normalize(passing_2009$yards_pass_comp)
passing_2009$yards_pass_game <- normalize(passing_2009$yards_pass_game)
passing_2009$pass_rate <- normalize(passing_2009$pass_rate)
passing_2009$sacks <- normalize(passing_2009$sacks)
passing_2009$sacks_pass <- normalize(passing_2009$sacks_pass)
passing_2009$yards_net_pass_att <- normalize(passing_2009$yards_net_pass_att)
passing_2009$yards_net_pass_att_avg <- normalize(passing_2009$yards_net_pass_att_avg)
passing_2009$sacks_pct <- normalize(passing_2009$sacks_pct)
passing_2009$comebacks <- normalize(passing_2009$comebacks)
passing_2009$game_win_drives <- normalize(passing_2009$game_win_drives)

passing_2010$age <- normalize(passing_2010$age)
passing_2010$games <- normalize(passing_2010$games)
passing_2010$games_started <- normalize(passing_2010$games_started)
passing_2010$completions <- normalize(passing_2010$completions)
passing_2010$attempts <- normalize(passing_2010$attempts)
passing_2010$comp_pct <- normalize(passing_2010$comp_pct)
passing_2010$yards_passing <- normalize(passing_2010$yards_passing)
passing_2010$td_passing <- normalize(passing_2010$td_passing)
passing_2010$td_pct <- normalize(passing_2010$td_pct)
passing_2010$interceptions <- normalize(passing_2010$interceptions)
passing_2010$int_pct <- normalize(passing_2010$int_pct)
passing_2010$long_passing <- normalize(passing_2010$long_passing)
passing_2010$yards_pass_att <- normalize(passing_2010$yards_pass_att)
passing_2010$yards_pass_att_avg <- normalize(passing_2010$yards_pass_att_avg)
passing_2010$yards_pass_comp <- normalize(passing_2010$yards_pass_comp)
passing_2010$yards_pass_game <- normalize(passing_2010$yards_pass_game)
passing_2010$pass_rate <- normalize(passing_2010$pass_rate)
passing_2010$sacks <- normalize(passing_2010$sacks)
passing_2010$sacks_pass <- normalize(passing_2010$sacks_pass)
passing_2010$yards_net_pass_att <- normalize(passing_2010$yards_net_pass_att)
passing_2010$yards_net_pass_att_avg <- normalize(passing_2010$yards_net_pass_att_avg)
passing_2010$sacks_pct <- normalize(passing_2010$sacks_pct)
passing_2010$comebacks <- normalize(passing_2010$comebacks)
passing_2010$game_win_drives <- normalize(passing_2010$game_win_drives)

passing_2011$age <- normalize(passing_2011$age)
passing_2011$games <- normalize(passing_2011$games)
passing_2011$games_started <- normalize(passing_2011$games_started)
passing_2011$completions <- normalize(passing_2011$completions)
passing_2011$attempts <- normalize(passing_2011$attempts)
passing_2011$comp_pct <- normalize(passing_2011$comp_pct)
passing_2011$yards_passing <- normalize(passing_2011$yards_passing)
passing_2011$td_passing <- normalize(passing_2011$td_passing)
passing_2011$td_pct <- normalize(passing_2011$td_pct)
passing_2011$interceptions <- normalize(passing_2011$interceptions)
passing_2011$int_pct <- normalize(passing_2011$int_pct)
passing_2011$long_passing <- normalize(passing_2011$long_passing)
passing_2011$yards_pass_att <- normalize(passing_2011$yards_pass_att)
passing_2011$yards_pass_att_avg <- normalize(passing_2011$yards_pass_att_avg)
passing_2011$yards_pass_comp <- normalize(passing_2011$yards_pass_comp)
passing_2011$yards_pass_game <- normalize(passing_2011$yards_pass_game)
passing_2011$pass_rate <- normalize(passing_2011$pass_rate)
passing_2011$sacks <- normalize(passing_2011$sacks)
passing_2011$sacks_pass <- normalize(passing_2011$sacks_pass)
passing_2011$yards_net_pass_att <- normalize(passing_2011$yards_net_pass_att)
passing_2011$yards_net_pass_att_avg <- normalize(passing_2011$yards_net_pass_att_avg)
passing_2011$sacks_pct <- normalize(passing_2011$sacks_pct)
passing_2011$comebacks <- normalize(passing_2011$comebacks)
passing_2011$game_win_drives <- normalize(passing_2011$game_win_drives)

passing_2012$age <- normalize(passing_2012$age)
passing_2012$games <- normalize(passing_2012$games)
passing_2012$games_started <- normalize(passing_2012$games_started)
passing_2012$completions <- normalize(passing_2012$completions)
passing_2012$attempts <- normalize(passing_2012$attempts)
passing_2012$comp_pct <- normalize(passing_2012$comp_pct)
passing_2012$yards_passing <- normalize(passing_2012$yards_passing)
passing_2012$td_passing <- normalize(passing_2012$td_passing)
passing_2012$td_pct <- normalize(passing_2012$td_pct)
passing_2012$interceptions <- normalize(passing_2012$interceptions)
passing_2012$int_pct <- normalize(passing_2012$int_pct)
passing_2012$long_passing <- normalize(passing_2012$long_passing)
passing_2012$yards_pass_att <- normalize(passing_2012$yards_pass_att)
passing_2012$yards_pass_att_avg <- normalize(passing_2012$yards_pass_att_avg)
passing_2012$yards_pass_comp <- normalize(passing_2012$yards_pass_comp)
passing_2012$yards_pass_game <- normalize(passing_2012$yards_pass_game)
passing_2012$pass_rate <- normalize(passing_2012$pass_rate)
passing_2012$sacks <- normalize(passing_2012$sacks)
passing_2012$sacks_pass <- normalize(passing_2012$sacks_pass)
passing_2012$yards_net_pass_att <- normalize(passing_2012$yards_net_pass_att)
passing_2012$yards_net_pass_att_avg <- normalize(passing_2012$yards_net_pass_att_avg)
passing_2012$sacks_pct <- normalize(passing_2012$sacks_pct)
passing_2012$comebacks <- normalize(passing_2012$comebacks)
passing_2012$game_win_drives <- normalize(passing_2012$game_win_drives)

passing_2013$age <- normalize(passing_2013$age)
passing_2013$games <- normalize(passing_2013$games)
passing_2013$games_started <- normalize(passing_2013$games_started)
passing_2013$completions <- normalize(passing_2013$completions)
passing_2013$attempts <- normalize(passing_2013$attempts)
passing_2013$comp_pct <- normalize(passing_2013$comp_pct)
passing_2013$yards_passing <- normalize(passing_2013$yards_passing)
passing_2013$td_passing <- normalize(passing_2013$td_passing)
passing_2013$td_pct <- normalize(passing_2013$td_pct)
passing_2013$interceptions <- normalize(passing_2013$interceptions)
passing_2013$int_pct <- normalize(passing_2013$int_pct)
passing_2013$long_passing <- normalize(passing_2013$long_passing)
passing_2013$yards_pass_att <- normalize(passing_2013$yards_pass_att)
passing_2013$yards_pass_att_avg <- normalize(passing_2013$yards_pass_att_avg)
passing_2013$yards_pass_comp <- normalize(passing_2013$yards_pass_comp)
passing_2013$yards_pass_game <- normalize(passing_2013$yards_pass_game)
passing_2013$pass_rate <- normalize(passing_2013$pass_rate)
passing_2013$sacks <- normalize(passing_2013$sacks)
passing_2013$sacks_pass <- normalize(passing_2013$sacks_pass)
passing_2013$yards_net_pass_att <- normalize(passing_2013$yards_net_pass_att)
passing_2013$yards_net_pass_att_avg <- normalize(passing_2013$yards_net_pass_att_avg)
passing_2013$sacks_pct <- normalize(passing_2013$sacks_pct)
passing_2013$comebacks <- normalize(passing_2013$comebacks)
passing_2013$game_win_drives <- normalize(passing_2013$game_win_drives)

# Combine all Passing seasons
passing <- rbind(passing_2004, passing_2005, passing_2006, passing_2007, passing_2008, 
                 passing_2009, passing_2010, passing_2011, passing_2012, passing_2013)

# Remove all individual Passing seasons
rm(passing_2004, passing_2005, passing_2006, passing_2007, passing_2008, 
   passing_2009, passing_2010, passing_2011, passing_2012, passing_2013)

summary(passing)

1 个答案:

答案 0 :(得分:0)

您可以做很多事情来加快速度......主要是使用plyrdplyr软件包。显然,还有进一步的调整,但这应该会显着减少你放在一起的代码量。

如果是幻想足球,我很乐意看到你的结果......:P

library(plyr)
library(dplyr)
library(XML)
library(plyr)
library(stringr)

# custom functions
normalize <- function(x) { 
  return((x - min(x)) / (max(x) - min(x)))
}

trim <- function (x) gsub("^\\s+|\\s+$", "", x)

"%w/o%" <- function(x, y) x[!x %in% y] #--  x without y

# define seasons
first_season <- 2004
last_season <- 2013
num_seasons <- as.numeric(last_season - first_season + 1)

seasons <- seq(first_season, last_season, by=1)
rm(first_season, last_season)


# Passing 
passing <- data.frame()
for (i in 1:num_seasons) {
  url <- paste("http://www.pro-football-reference.com/years/", seasons[i],"/passing.htm", sep = "")
  df <- readHTMLTable(url,which=1)
  df$season = seasons[i]
  df <- df[!names(df) %in% c("QBrec") ] 
  if(df$season >= 2008) df <- df[!names(df) %in% c("QBR") ] # Removes QBR 2008+
  passing <- rbind(passing, df)
  print(seasons[i])
}

names(passing) <- c("rank_pfr", "nameinfo", "team", "age", "games", "games_started",
                    #"qb_record", 
                    "completions", "attempts", "comp_pct", "yards_passing",
                    "td_passing", "td_pct", "interceptions", "int_pct", "long_passing",
                    "yards_pass_att", "yards_pass_att_avg", "yards_pass_comp", "yards_pass_game", "pass_rate", "sacks", "sacks_pass", "yards_net_pass_att", "yards_net_pass_att_avg", "sacks_pct", "comebacks", "game_win_drives", "season")

passing <- passing[which(passing$rank_pfr!='Rk'), ]

# Convert numeric columns to numeric
numeric_columns <- names(passing) %w/o% c('nameinfo', 'team')
numeric_columns <- passing[, numeric_columns]
numeric_columns <- data.frame(apply(numeric_columns, 2, function(x) as.numeric(as.character(x))))
numeric_columns[is.na(numeric_columns)] <- 0
numeric_columns <- plyr::ddply(numeric_columns, .(season), colwise(normalize))
passing <- data.frame(nameinfo = passing$nameinfo,
                      team = passing$team) %>%
  cbind(numeric_columns)

summary(passing)