请教一个线上问题,流量早高峰时,接口耗时增多,前端请求 pending,应该如何分析以及定位问题。

29 天前
 silence01
目前服务端是两台 8C32G 机器,一台机器带宽比较大,承载主要流量,这台机器上有个 nginx 容器以及多个 springboot 项目容器,nginx 主要做反向代理以及负载均衡。
通过阿里云监控可以看到在流量高峰时,tcp 连接数,正在建立连接的数据飙高。如下图
https://imgur.com/a/wBov3gq
nginx 的报错日志显示 worker_connections are not enough ,
然后高峰期接口耗时日志明显增加,请问,在这种情况下应该如何分析以及定位问题?试过调高 worker_connections ,但是不起作用。另外感觉目前的服务端架构不太合理,流量入口是在一台机器上,系统瓶颈还是在一台机器上,应该怎么优化呢。
还请各位大佬不吝赐教。
1023 次点击
所在节点    Java
6 条回复
CodeWind
29 天前
贴一下 nginx 容器的配置,以及 nginx 的配置信息。然后建议将 Nginx 容器的网络模式改为 HostNetwork 试试。
silence01
29 天前
@CodeWind
nginx 容器配置信息

[
{
"Id": "c0ea1b73f4a4183219515743d236f20934b7b80ae95d6107b5b71f84a9973f43",
"Created": "2023-12-27T08:26:37.928226351Z",
"Path": "/docker-entrypoint.sh",
"Args": [
"nginx",
"-g",
"daemon off;"
],
"State": {
"Status": "running",
"Running": true,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 28784,
"ExitCode": 0,
"Error": "",
"StartedAt": "2023-12-27T08:26:38.109869375Z",
"FinishedAt": "0001-01-01T00:00:00Z"
},
"Image": "sha256:a99a39d070bfd1cb60fe65c45dea3a33764dc00a9546bf8dc46cb5a11b1b50e9",
"ResolvConfPath": "/var/lib/docker/containers/c0ea1b73f4a4183219515743d236f20934b7b80ae95d6107b5b71f84a9973f43/resolv.conf",
"HostnamePath": "/var/lib/docker/containers/c0ea1b73f4a4183219515743d236f20934b7b80ae95d6107b5b71f84a9973f43/hostname",
"HostsPath": "/var/lib/docker/containers/c0ea1b73f4a4183219515743d236f20934b7b80ae95d6107b5b71f84a9973f43/hosts",
"LogPath": "",
"Name": "/nginx",
"RestartCount": 0,
"Driver": "overlay2",
"MountLabel": "",
"ProcessLabel": "",
"AppArmorProfile": "",
"ExecIDs": [
"6d5fa77c7040de4f287c63225ef28a517878a61f310bb179184e13f1196d216f",
"2cd8e8e9d77c0b3aec39adcc8e4a2b153d174ad060ec71018c97c5f2b24f1205",
"86e0bb5b5eeadf7df2809d5a778f332eb01e20518420ae3c82c097798914125d",
"01f17216c739f84790c190833342f54c6025a4a1bd2c8013dd61e5979f4ce461",
"efcada90dbba7550b372930560ed1d18cf1b10d91f5c08519b1c01ad7af4ea71",
"54333782d48cf040258fb14d5e2739156484aab3026dc2720df8ec1cfbf99245",
"465102c14c5ff464eec011fce630ea553fcd09cb50d9b34a854293a750e4b614",
"a1ec1c96827ce60bdfa4c68d165820368834c7c5c00d2a6c24d8cca574f16a71",
"8fc228b0db75742e61f36784b6a4a44651f3a14ee629a25e833586f6fc868f6c"
],
"HostConfig": {
"Binds": null,
"ContainerIDFile": "",
"LogConfig": {
"Type": "journald",
"Config": {}
},
"NetworkMode": "default",
"PortBindings": {
"443/tcp": [
{
"HostIp": "",
"HostPort": "443"
}
],
"80/tcp": [
{
"HostIp": "",
"HostPort": "80"
}
]
},
"RestartPolicy": {
"Name": "no",
"MaximumRetryCount": 0
},
"AutoRemove": false,
"VolumeDriver": "",
"VolumesFrom": null,
"CapAdd": null,
"CapDrop": null,
"Dns": [],
"DnsOptions": [],
"DnsSearch": [],
"ExtraHosts": null,
"GroupAdd": null,
"IpcMode": "",
"Cgroup": "",
"Links": null,
"OomScoreAdj": 0,
"PidMode": "",
"Privileged": false,
"PublishAllPorts": false,
"ReadonlyRootfs": false,
"SecurityOpt": null,
"UTSMode": "",
"UsernsMode": "",
"ShmSize": 67108864,
"Runtime": "docker-runc",
"ConsoleSize": [
0,
0
],
"Isolation": "",
"CpuShares": 0,
"Memory": 0,
"NanoCpus": 0,
"CgroupParent": "",
"BlkioWeight": 0,
"BlkioWeightDevice": null,
"BlkioDeviceReadBps": null,
"BlkioDeviceWriteBps": null,
"BlkioDeviceReadIOps": null,
"BlkioDeviceWriteIOps": null,
"CpuPeriod": 0,
"CpuQuota": 0,
"CpuRealtimePeriod": 0,
"CpuRealtimeRuntime": 0,
"CpusetCpus": "",
"CpusetMems": "",
"Devices": [],
"DiskQuota": 0,
"KernelMemory": 0,
"MemoryReservation": 0,
"MemorySwap": 0,
"MemorySwappiness": -1,
"OomKillDisable": false,
"PidsLimit": 0,
"Ulimits": null,
"CpuCount": 0,
"CpuPercent": 0,
"IOMaximumIOps": 0,
"IOMaximumBandwidth": 0
},
"GraphDriver": {
"Name": "overlay2",
"Data": {
"LowerDir": "/var/lib/docker/overlay2/91e3a4840e9b70c770780d76ddc290ca8c13c6aa6cc486a3f4e4733d0c0c5f22-init/diff:/var/lib/docker/overlay2/5d91f3f600ca558808844dfde84be684d870904ae0f90550b3ec02e8abda0177/diff:/var/lib/docker/overlay2/fcb79371f98a9e2cb86cbc89aee92c140fdd070783e59fc9ee387a10723e3a0c/diff:/var/lib/docker/overlay2/8c7c190ee5730833267d5fc47eca96cf9554f396e578dcdc71402ad420302c43/diff:/var/lib/docker/overlay2/e8d8fc9899cf6baa778175efa5bb3d51c9a99e51025c706f3fcbb0e914a32afe/diff:/var/lib/docker/overlay2/b20b67ff2f5dc33991a222d6590faad8e21eb2d017654423830763c89dbc431b/diff:/var/lib/docker/overlay2/77778c655de26a0011d507926f9f60e1df7da18b901028374625be538600fefe/diff",
"MergedDir": "/var/lib/docker/overlay2/91e3a4840e9b70c770780d76ddc290ca8c13c6aa6cc486a3f4e4733d0c0c5f22/merged",
"UpperDir": "/var/lib/docker/overlay2/91e3a4840e9b70c770780d76ddc290ca8c13c6aa6cc486a3f4e4733d0c0c5f22/diff",
"WorkDir": "/var/lib/docker/overlay2/91e3a4840e9b70c770780d76ddc290ca8c13c6aa6cc486a3f4e4733d0c0c5f22/work"
}
},
"Mounts": [],
"Config": {
"Hostname": "c0ea1b73f4a4",
"Domainname": "",
"User": "",
"AttachStdin": false,
"AttachStdout": false,
"AttachStderr": false,
"ExposedPorts": {
"443/tcp": {},
"80/tcp": {}
},
"Tty": false,
"OpenStdin": false,
"StdinOnce": false,
"Env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"NGINX_VERSION=1.23.3",
"NJS_VERSION=0.7.9",
"PKG_RELEASE=1~bullseye"
],
"Cmd": [
"nginx",
"-g",
"daemon off;"
],
"Image": "nginx",
"Volumes": null,
"WorkingDir": "",
"Entrypoint": [
"/docker-entrypoint.sh"
],
"OnBuild": null,
"Labels": {
"maintainer": "NGINX Docker Maintainers <docker-maint@nginx.com>"
},
"StopSignal": "SIGQUIT"
},
"NetworkSettings": {
"Bridge": "",
"SandboxID": "48f11bccefb8b41c1586a404810af3b4bccbd2225ea61d52e0c0e737f9c1968d",
"HairpinMode": false,
"LinkLocalIPv6Address": "",
"LinkLocalIPv6PrefixLen": 0,
"Ports": {
"443/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "443"
}
],
"80/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "80"
}
]
},
"SandboxKey": "/var/run/docker/netns/48f11bccefb8",
"SecondaryIPAddresses": null,
"SecondaryIPv6Addresses": null,
"EndpointID": "e75e3c0b5836cc4532127e712e60f157d35167f82c5a2befd608c48622fd90d2",
"Gateway": "172.17.0.1",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"IPAddress": "172.17.0.4",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"MacAddress": "02:42:ac:11:00:04",
"Networks": {
"bridge": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "54c36131e69a5d0f69f965f393b8c0c260df3ee18e5e62be482e7322186b86ba",
"EndpointID": "e75e3c0b5836cc4532127e712e60f157d35167f82c5a2befd608c48622fd90d2",
"Gateway": "172.17.0.1",
"IPAddress": "172.17.0.4",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:11:00:04"
}
}
}
}
]

nginx 配置信息:
user nginx;
worker_processes auto;

#error_log /var/log/nginx/error.log notice;
# error_log /etc/nginx/conf.d/error.log notice;

pid /var/run/nginx.pid;


events {
worker_connections 1224;
}


http {
include /etc/nginx/mime.types;
default_type application/octet-stream;


log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" $request_time $upstream_response_time';

access_log /var/log/nginx/access.log main;
error_log /var/log/nginx/error.log error;
rewrite_log on;

sendfile on;
#tcp_nopush on;

keepalive_timeout 65;

#gzip on;

include /etc/nginx/conf.d/*.conf;
}
fkdtz
29 天前
你都说高峰期接口耗时日志明显增加,建议先查后端逻辑。
有可能后端因为某种原因响应很慢,导致 Nginx worker 无法快速响应前端,直到把 worker 全都占满。
如果是这种情况的话,问题出在后端,调高负载均衡配置不解决根本问题。
CodeWind
29 天前
@silencechengk 需要再看一下宿主机和 Nginx 容器内 ulimit 和最大打开文件数设置,如果这两个参数小于 worker_connections 值,调大 worker_connections 参数无意义。
CodeWind
29 天前
宿主机的 ulimit 参数建议要大于容器,小于就没有意义。另外你也打印了 upstream_response_time 日志,可以根据日志筛选接口的响应时间,对比一下其他时间段。如果发现高峰期响应时间明显上升,那就是后端代码的问题,如果发现变化不大,那就是 nginx 的问题
BiChengfei
28 天前
worker_connections are not enough 没遇到过,也没设置过
从”流量早高峰时,接口耗时增多,前端请求 pending“分析
1. 频繁创建销毁线程,导致 GC 频繁,一般是 Yong GC ,导致耗时边长,可以监控下 GC 频率
2. 有线程共享资源竞争情况,导致线程等待,导致耗时变长
这两点都是从减少接口耗时,及时释放资源,来解决连接数不足的问题

这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。

https://www.v2ex.com/t/1033076

V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。

V2EX is a community of developers, designers and creative people.

© 2021 V2EX