Question

the link of data from dropbox badfitting我尝试使用curve_fit将数据与python中的 pre_defined 函数拟合，但结果远非完美。代码很简单，如下所示。我不知道怎么了由于我是python的新手，是否还有其他适合我的带有预定义函数的优化或拟合方法？

谢谢！

import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

def func(x, r1, r2, r3,l,c):
    w=2*math.pi*x
    m=r1+(r2*l*w)/(r2**2+l**2*w**2)+r3/(1+r3*c**2*w**2)
    n=(r2**2*l*w)/(r2**2+l**2*w**2)-r3**3*c*w/(1+r3*c**2*w**2)
    y= (m**2+n**2)**.5
    return y

def readdata(filename):
    x = filename.readlines()
    x = list(map(lambda s: s.strip(), x))
    x = list(map(float, x))
    return x

 # test data
f_x= open(r'C:\Users\adm\Desktop\simpletry\fre.txt')
xdata = readdata(f_x)

f_y= open(r'C:\Users\adm\Desktop\simpletry\impedance.txt')
ydata = readdata(f_y)

xdata = np.array(xdata)
ydata = np.array(ydata)
plt.semilogx(xdata, ydata, 'b-', label='data')

popt, pcov = curve_fit(func, xdata, ydata, bounds=((0, 0, 0, 0, 0), (np.inf, np.inf, np.inf, np.inf, np.inf)))
plt.semilogx(xdata, func(xdata, *popt), 'r-', label='fitted curve') 

print(popt)
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

您猜到了，这是一个LCR电路模型。现在我正在尝试拟合具有相同参数的两条曲线

def func1(x, r1, r2, r3,l,c):
w=2*math.pi*x
m=r1+(r2*l*w)/(r2**2+l**2*w**2)+r3/(1+r3*c**2*w**2)
return m

def func2(x, r1, r2, r3,l,c):
w=2*math.pi*x
n=(r2**2*l*w)/(r2**2+l**2*w**2)-r3**3*c*w/(1+r3*c**2*w**2)
return n

是否可以使用curve_fitting优化参数？

Answer 1

这是我使用scipy的differential_evolution遗传算法模块生成curve_fit的初始参数估计值以及函数中一个简单的“砖墙”以确保所有参数均为正数的结果。 Scipy的差异演化实现使用Latin Hypercube算法来确保对参数空间进行彻底搜索，这需要在搜索范围内进行搜索-在此示例中，这些限制来自数据的最大值和最小值。我的结果：

RMSE：7.415

R平方：0.999995

r1 = 1.16614005e + 00

r2 = 2.00000664e + 05

r3 = 1.54718886e + 01

l = 1.94473531e + 04

c = 4.32515535e + 05

import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings

def func(x, r1, r2, r3,l,c):
    # "brick wall" ensuring all parameters are positive
    if r1 < 0.0 or r2 < 0.0 or r3 < 0.0 or l < 0.0 or c < 0.0:
        return 1.0E10 # large value gives large error, curve_fit hits a brick wall

    w=2*numpy.pi*x
    m=r1+(r2*l*w)/(r2**2+l**2*w**2)+r3/(1+r3*c**2*w**2)
    n=(r2**2*l*w)/(r2**2+l**2*w**2)-r3**3*c*w/(1+r3*c**2*w**2)
    y= (m**2+n**2)**.5
    return y


def readdata(filename):
    x = filename.readlines()
    x = list(map(lambda s: s.strip(), x))
    x = list(map(float, x))
    return x

 # test data
f_x= open('/home/zunzun/temp/data/fre.txt')
xData = readdata(f_x)

f_y= open('/home/zunzun/temp/data/impedance.txt')
yData = readdata(f_y)

xData = numpy.array(xData)
yData = numpy.array(yData)


# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
    warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
    val = func(xData, *parameterTuple)
    return numpy.sum((yData - val) ** 2.0)


def generate_Initial_Parameters():
    # min and max used for bounds
    maxX = max(xData)
    minX = min(xData)
    maxY = max(yData)
    minY = min(yData)
    minBound = min(minX, minY)
    maxBound = max(maxX, maxY)
    parameterBounds = []
    parameterBounds.append([minBound, maxBound]) # search bounds for r1
    parameterBounds.append([minBound, maxBound]) # search bounds for r2
    parameterBounds.append([minBound, maxBound]) # search bounds for r3
    parameterBounds.append([minBound, maxBound]) # search bounds for l
    parameterBounds.append([minBound, maxBound]) # search bounds for c

    # "seed" the numpy random number generator for repeatable results
    result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
    return result.x

# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()

# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()

modelPredictions = func(xData, *fittedParameters) 

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))

print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    plt.semilogx(xData, yData, 'D')

    # create data for the fitted equation plot
    yModel = func(xData, *fittedParameters)

    # now the model as a line plot
    plt.semilogx(xData, yModel) 

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Answer 2

要使最小二乘回归有意义，您至少必须提供有意义的初始参数。

由于默认情况下所有参数均初始化为值1，因此对初始回归的最大影响将是电阻器r1，该电阻会为混音添加一个常数。

很可能您最终会遇到以下配置：

popt
Out[241]: 
array([1.66581563e+03, 2.43663552e+02, 1.13019744e+00, 1.20233767e+00,
       5.04984535e-04])

由于m = something big + ~0 + ~0，这将输出整齐的扁平线； n=~0 - ~0，所以y = r1。

但是，如果您对参数的初始化有所不同，

popt, pcov = curve_fit(func, xdata.flatten(), ydata.flatten(), p0=[0.1,1e5,1000,1000,0.2],
    bounds=((0, 0, 0, 0, 0), (np.inf, np.inf, np.inf, np.inf, np.inf)))

您会看起来更健康，

popt
Out[244]: 
array([1.14947146e+00, 4.12512324e+05, 1.36182466e+02, 8.29771756e+04,
       1.77593448e+03])

((fitted-ydata.flatten())**2).mean()
Out[257]: 0.6099524982664816
#RMSE hence 0.78

P.s。由于pd.read_clipboard的转换错误，我的数据从第二个数据点开始，其中第一行成为标题而不是数据。不过不应该改变整体情况。

python curve_fitting效果不好

2 个答案: