服务端
import json
import uvicorn
from openai import AsyncOpenAI
from fastapi import FastAPI, Request, responsesclient = AsyncOpenAI( api_key= "" , base_url= "https://open.bigmodel.cn/api/paas/v4/"
)
MODEL = "glm-4-flash"
app = FastAPI( ) @app. route ( "/stream" )
async def stream_endpoint ( request: Request) : async def response_stream ( ) : completion = client. chat. completions. create( model= MODEL, messages= [ { "role" : "system" , "content" : "你是一个聪明且富有创造力的小说作家" } , { "role" : "user" , "content" : "请你作为童话故事大王,写一篇短篇童话故事。" } ] , top_p= 0.7 , temperature= 0.9 , stream= True ) partition = [ ] async for chunk in await completion: partition. append( chunk. choices[ 0 ] . delta. content) yield '' . join( partition) + "\n" return responses. StreamingResponse( response_stream( ) ) if __name__ == "__main__" : uvicorn. run( app, host= "0.0.0.0" , port= 8000 )
调用方法
import requests
with requests. get( 'http://localhost:8000/stream' , stream= True ) as r: for line in r. iter_lines( ) : if line: decoded_line = line. decode( 'utf-8' ) print ( decoded_line)