用数据框中的NA替换字符值

时间:2010-07-28 21:42:56

标签: r dataframe na

我有一个数据框,其中包含(在随机位置)我要用"foo"替换的字符值(比如NA)。

在整个数据框架中这样做的最佳方式是什么?

7 个答案:

答案 0 :(得分:77)

此:

df[ df == "foo" ] <- NA

答案 1 :(得分:65)

将这一点扼杀在萌芽状态的一种方法是在首先读取数据时将该字符转换为NA。

df <- read.csv("file.csv", na.strings = c("foo", "bar"))

答案 2 :(得分:4)

另一个选项是is.na<-

is.na(df) <- df == "foo"

请注意,它的使用可能看起来有点违反直觉,但它实际上 NA值分配给右侧索引处的df

答案 3 :(得分:2)

使用dplyr::na_if,可以将特定值替换为NA。在这种情况下,应该是"foo"

library(dplyr)
set.seed(1234)

df <- data.frame(
  id = 1:6,
  x = sample(c("a", "b", "foo"), 6, replace = T),
  y = sample(c("c", "d", "foo"), 6, replace = T),
  z = sample(c("e", "f", "foo"), 6, replace = T),
  stringsAsFactors = F
)
df
#>   id   x   y   z
#> 1  1   a   c   e
#> 2  2   b   c foo
#> 3  3   b   d   e
#> 4  4   b   d foo
#> 5  5 foo foo   e
#> 6  6   b   d   e

na_if(df$x, "foo")
#> [1] "a" "b" "b" "b" NA  "b"

如果您需要对多列进行此操作,则可以从"foo"mutate_at

df %>%
  mutate_at(vars(x, y, z), na_if, "foo")
#>   id    x    y    z
#> 1  1    a    c    e
#> 2  2    b    c <NA>
#> 3  3    b    d    e
#> 4  4    b    d <NA>
#> 5  5 <NA> <NA>    e
#> 6  6    b    d    e

答案 4 :(得分:1)

可以使用dplyr::mutate_all()replace

完成此操作
library(dplyr)
df <- data_frame(a = c('foo', 2, 3), b = c(1, 'foo', 3), c = c(1,2,'foobar'),  d = c(1, 2, 3))

> df
# A tibble: 3 x 4
     a     b      c     d
  <chr> <chr>  <chr> <dbl>
1   foo     1      1     1
2     2   foo      2     2
3     3     3 foobar     3


df <- mutate_all(df, funs(replace(., .=='foo', NA)))

> df
# A tibble: 3 x 4
      a     b      c     d
  <chr> <chr>  <chr> <dbl>
1  <NA>     1      1     1
2     2  <NA>      2     2
3     3     3 foobar     3

另一个dplyr选项是:

df <- na_if(df, 'foo') 

答案 5 :(得分:0)

另一种解决方法如下:

for (i in 1:ncol(DF)){
  DF[which(DF[,i]==""),columnIndex]<-"ALL"
  FinalData[which(is.na(FinalData[,columnIndex])),columnIndex]<-"ALL"
}

答案 6 :(得分:0)

假设您不知道列名或要选择的列数很多,import matplotlib.pyplot as plt import numpy as np import scipy.special show_animation = True def calc_4points_bezier_path(sx, sy, syaw, ex, ey, eyaw, offset): """ Compute control points and path given start and end position. :param sx: (float) x-coordinate of the starting point :param sy: (float) y-coordinate of the starting point :param syaw: (float) yaw angle at start :param ex: (float) x-coordinate of the ending point :param ey: (float) y-coordinate of the ending point :param eyaw: (float) yaw angle at the end :param offset: (float) :return: (numpy array, numpy array) """ dist = np.sqrt((sx - ex) ** 2 + (sy - ey) ** 2) / offset control_points = np.array( [[sx, sy], [sx + dist * np.cos(syaw), sy + dist * np.sin(syaw)], [ex - dist * np.cos(eyaw), ey - dist * np.sin(eyaw)], [ex, ey]]) path = calc_bezier_path(control_points, n_points=100) return path, control_points def calc_bezier_path(control_points, n_points=100): """ Compute bezier path (trajectory) given control points. :param control_points: (numpy array) :param n_points: (int) number of points in the trajectory :return: (numpy array) """ traj = [] for t in np.linspace(0, 1, n_points): traj.append(bezier(t, control_points)) return np.array(traj) def bernstein_poly(n, i, t): """ Bernstein polynom. :param n: (int) polynom degree :param i: (int) :param t: (float) :return: (float) """ return scipy.special.comb(n, i) * t ** i * (1 - t) ** (n - i) def bezier(t, control_points): """ Return one point on the bezier curve. :param t: (float) number in [0, 1] :param control_points: (numpy array) :return: (numpy array) Coordinates of the point """ n = len(control_points) - 1 return np.sum([bernstein_poly(n, i, t) * control_points[i] for i in range(n + 1)], axis=0) def bezier_derivatives_control_points(control_points, n_derivatives): """ Compute control points of the successive derivatives of a given bezier curve. A derivative of a bezier curve is a bezier curve. See https://pomax.github.io/bezierinfo/#derivatives for detailed explanations :param control_points: (numpy array) :param n_derivatives: (int) e.g., n_derivatives=2 -> compute control points for first and second derivatives :return: ([numpy array]) """ w = {0: control_points} for i in range(n_derivatives): n = len(w[i]) w[i + 1] = np.array([(n - 1) * (w[i][j + 1] - w[i][j]) for j in range(n - 1)]) return w def curvature(dx, dy, ddx, ddy): """ Compute curvature at one point given first and second derivatives. :param dx: (float) First derivative along x axis :param dy: (float) :param ddx: (float) Second derivative along x axis :param ddy: (float) :return: (float) """ return (dx * ddy - dy * ddx) / (dx ** 2 + dy ** 2) ** (3 / 2) def plot_arrow(x, y, yaw, length=1.0, width=0.5, fc="r", ec="k"): # pragma: no cover """Plot arrow.""" if not isinstance(x, float): for (ix, iy, iyaw) in zip(x, y, yaw): plot_arrow(ix, iy, iyaw) else: plt.arrow(x, y, length * np.cos(yaw), length * np.sin(yaw), fc=fc, ec=ec, head_width=width, head_length=width) plt.plot(x, y) def main(): """Plot an example bezier curve.""" start_x = 10.0 # [m] start_y = 1.0 # [m] start_yaw = np.radians(180.0) # [rad] end_x = -0.0 # [m] end_y = -3.0 # [m] end_yaw = np.radians(-45.0) # [rad] offset = 3.0 path, control_points = calc_4points_bezier_path( start_x, start_y, start_yaw, end_x, end_y, end_yaw, offset) # Note: alternatively, instead of specifying start and end position # you can directly define n control points and compute the path: #control_points = np.array([[5., 1.], [-2.78, 1.], [-11.5, -4.5], [-6., -8.]]) #path = calc_bezier_path(control_points, n_points=100) # Display the tangent, normal and radius of cruvature at a given point t = 0.86 # Number in [0, 1] x_target, y_target = bezier(t, control_points) derivatives_cp = bezier_derivatives_control_points(control_points, 2) point = bezier(t, control_points) dt = bezier(t, derivatives_cp[1]) ddt = bezier(t, derivatives_cp[2]) # Radius of curvature radius = 1 / curvature(dt[0], dt[1], ddt[0], ddt[1]) # Normalize derivative dt /= np.linalg.norm(dt, 2) tangent = np.array([point, point + dt]) normal = np.array([point, point + [- dt[1], dt[0]]]) curvature_center = point + np.array([- dt[1], dt[0]]) * radius circle = plt.Circle(tuple(curvature_center), radius, color=(0, 0.8, 0.8), fill=False, linewidth=1) assert path.T[0][0] == start_x, "path is invalid" assert path.T[1][0] == start_y, "path is invalid" assert path.T[0][-1] == end_x, "path is invalid" assert path.T[1][-1] == end_y, "path is invalid" if show_animation: # pragma: no cover fig, ax = plt.subplots() ax.plot(path.T[0], path.T[1], label="Cubic Bezier Path") ax.plot(control_points.T[0], control_points.T[1], '--o', label="Control Points") ax.plot(x_target, y_target) ax.plot(tangent[:, 0], tangent[:, 1], label="Tangent") ax.plot(normal[:, 0], normal[:, 1], label="Normal") ax.add_artist(circle) plot_arrow(start_x, start_y, start_yaw) plot_arrow(end_x, end_y, end_yaw) plt.xlabel('X') plt.ylabel('Y') ax.legend() ax.axis("equal") ax.grid(True) for i, p in enumerate(control_points): ax.annotate(f'P{i:d}', xy=p, xytext=(0,5), textcoords='offset points', ha='center') plt.show() if __name__ == '__main__': main() 可能会有用。

is.character()