from io import BytesIO import requests import pdfplumber import re import boto3 from boto3.dynamodb.conditions import Key, Attr import json def lambda_handler(event, context): a="" url = "http://static.cninfo.com.cn/finalpage/2022-02-08/1212324031.PDF" req = requests.get(url) with pdfplumber.open(BytesIO(req.content)) as pdf: for page in pdf.pages: text = page.extract_text() a=a+text a=re.sub(r"/n", '', a) a=re.sub(r" ", '', a) aa = [{"id": 1, "name": a}] s_str = json.dumps(aa,ensure_ascii=False) OBJ_S3 = boto3.resource('s3') S_BUCKET = 'fenci' s_object_name = 'json.txt' OBJ_S3.Object(S_BUCKET, s_object_name).put(Body=s_str) return { 'statusCode': "hello", }
import json import boto3 def lambda_handler(event, context): # TODO implement OBJ_S3 = boto3.resource('s3') S_BUCKET = 'fenci' s_object_name = 'json.txt' obj = OBJ_S3.Object(S_BUCKET, s_object_name) s_out = obj.get()['Body'].read() return [{ 'id': 0, 'name': s_out }]
原创文章,作者:,如若转载,请注明出处:https://blog.ytso.com/275337.html