feat: scraping, database and tooling

This commit is contained in:
Riccardo
2023-11-30 17:20:44 +01:00
parent 6770080378
commit afa1b943e7
16 changed files with 4393 additions and 1 deletions

11
.env.example Normal file
View File

@@ -0,0 +1,11 @@
DATABASE_URL=
POSTGRES_USER=
POSTGRES_PASSWORD=
POSTGRES_DB=
WEBSITE_URL=
ANCHOR_ELEMENT=
ITERATIVE_CLASS=
NAME_CLASS=
AREAS_CLASS=
URL_CLASS=
PAGINATE_CLASS=

22
.eslintrc.json Normal file
View File

@@ -0,0 +1,22 @@
{
"env": {
"browser": true,
"es2021": true
},
"extends": [
"next/core-web-vitals",
"eslint:recommended",
"plugin:@typescript-eslint/recommended",
"prettier"
],
"parser": "@typescript-eslint/parser",
"parserOptions": {
"ecmaVersion": "latest",
"sourceType": "module"
},
"plugins": ["@typescript-eslint"],
"rules": {
"@typescript-eslint/no-unused-vars": "error",
"@typescript-eslint/consistent-type-definitions": ["error", "type"]
}
}

4
.husky/commit-msg Executable file
View File

@@ -0,0 +1,4 @@
#!/usr/bin/env sh
. "$(dirname -- "$0")/_/husky.sh"
npx --no-install commitlint --edit $1

7
.husky/pre-commit Executable file
View File

@@ -0,0 +1,7 @@
#!/usr/bin/env sh
. "$(dirname -- "$0")/_/husky.sh"
yarn audit
yarn format
yarn lint
yarn typecheck

9
.prettierrc Normal file
View File

@@ -0,0 +1,9 @@
{
"semi": true,
"trailingComma": "none",
"singleQuote": true,
"printWidth": 80,
"jsxSingleQuote": true,
"tabWidth": 2,
"arrowParens": "avoid"
}

12
Dockerfile Normal file
View File

@@ -0,0 +1,12 @@
FROM node:18 as builder
WORKDIR /app
COPY package*.json yarn.lock ./
RUN yarn install --frozen-lockfile --production
COPY . .
EXPOSE 3000
CMD ["bash", "-c", "yarn db:generate && yarn db:migrate && yarn start"]

View File

@@ -1 +1,71 @@
# xray-scraper # xray-scrap-test
## Setup
Run docker-compose to start (the database is exposed on port 5432 on localhost, and the credentials are in .env)
```bash
docker-compose up --build
```
## Commands
Run in development mode
```bash
yarn dev
```
Run in production mode
```bash
yarn start
```
Lint the code
```bash
yarn lint
```
Type check the code
```bash
yarn typecheck
```
Format the code
```bash
yarn format
```
Install Git hooks
```bash
yarn prepare
```
Run the migrations
```bash
yarn db:migrate
```
Add a new migration
```bash
yarn db:add-migration
```
Generate the prisma client
```bash
yarn db:generate
```
Wipe the database
```bash
yarn db:reset
```

1
commitlint.config.ts Normal file
View File

@@ -0,0 +1 @@
module.exports = { extends: ['@commitlint/config-conventional'] };

42
docker-compose.yml Normal file
View File

@@ -0,0 +1,42 @@
version: '3.8'
services:
backend:
build:
context: ./
dockerfile: Dockerfile
image: backend
container_name: backend
restart: no
ports:
- '3000:3000'
environment:
- PORT=3000
- DATABASE_URL=postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- POSTGRES_DB=${POSTGRES_DB}
- WEBSITE_URL=${WEBSITE_URL}
- ANCHOR_ELEMENT=${ANCHOR_ELEMENT}
- ITERATIVE_CLASS=${ITERATIVE_CLASS}
- NAME_CLASS=${NAME_CLASS}
- AREAS_CLASS=${AREAS_CLASS}
- URL_CLASS=${URL_CLASS}
- PAGINATE_CLASS=${PAGINATE_CLASS}
depends_on:
- postgres
postgres:
image: postgres:latest
container_name: postgres
restart: unless-stopped
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
ports:
- '5432:5432'
volumes:
- pgdata:/var/lib/postgresql/data
volumes:
pgdata:

53
package.json Normal file
View File

@@ -0,0 +1,53 @@
{
"name": "next-newsletter",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "nodemon --watch 'src/**/*.ts' --exec 'ts-node' src/index.ts",
"start": "ts-node src/index.ts",
"lint": "eslint --ext .ts . --fix",
"typecheck": "tsc --noEmit",
"format": "prettier --config .prettierrc 'src/**/*.ts' --write",
"prepare": "husky install",
"db:add-migration": "npx prisma migrate dev",
"db:migrate": "prisma migrate deploy",
"db:generate": "prisma generate",
"db:reset": "prisma migrate reset --force"
},
"dependencies": {
"@prisma/client": "^5.6.0",
"prisma": "^5.6.0",
"x-ray": "^2.3.4",
"zod": "^3.22.4",
"zod-validation-error": "^1.5.0"
},
"devDependencies": {
"@commitlint/cli": "^17.6.6",
"@commitlint/config-conventional": "^17.6.6",
"@types/node": "^20",
"@types/x-ray": "^2.3.6",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8",
"eslint-config-next": "14.0.3",
"eslint-config-prettier": "^8.8.0",
"husky": "^8.0.0",
"lint-staged": "^13.2.3",
"nodemon": "^3.0.1",
"prettier": "^3.0.0",
"ts-node": "^10.9.1",
"typescript": "^5.1.6"
},
"resolutions": {
"nth-check": "^2.0.1",
"debug": "^4.3.1"
},
"lint-staged": {
"*.ts": [
"eslint --quiet --fix"
],
"*.{json,ts}": [
"prettier --write --ignore-unknown"
]
}
}

View File

@@ -0,0 +1,13 @@
-- CreateTable
CREATE TABLE "Record" (
"id" TEXT NOT NULL,
"datetime" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"name" TEXT NOT NULL,
"areas" TEXT,
"url" TEXT NOT NULL,
CONSTRAINT "Record_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "Record_url_key" ON "Record"("url");

View File

@@ -0,0 +1,3 @@
# Please do not edit this file manually
# It should be added in your version-control system (i.e. Git)
provider = "postgresql"

19
prisma/schema.prisma Normal file
View File

@@ -0,0 +1,19 @@
// This is your Prisma schema file,
// learn more about it in the docs: https://pris.ly/d/prisma-schema
generator client {
provider = "prisma-client-js"
}
datasource db {
provider = "postgresql"
url = env("DATABASE_URL")
}
model Record {
id String @id @default(uuid())
datetime DateTime @default(now())
name String
areas String?
url String @unique
}

73
src/index.ts Normal file
View File

@@ -0,0 +1,73 @@
import { PrismaClient } from '@prisma/client';
import Xray from 'x-ray';
import { z } from 'zod';
import { fromZodError } from 'zod-validation-error';
const prisma = new PrismaClient();
const x = Xray();
if (
!process.env.WEBSITE_URL ||
!process.env.ANCHOR_ELEMENT ||
!process.env.ITERATIVE_CLASS ||
!process.env.NAME_CLASS ||
!process.env.AREAS_CLASS ||
!process.env.URL_CLASS ||
!process.env.PAGINATE_CLASS
) {
throw new Error('Not all environment variables are defined');
}
const Record = z.object({
name: z.string(),
areas: z.string().optional(),
url: z.string()
});
const Records = z.array(Record);
x(process.env.WEBSITE_URL, process.env.ANCHOR_ELEMENT, {
items: x(process.env.ITERATIVE_CLASS, [
{
name: process.env.NAME_CLASS,
areas: process.env.AREAS_CLASS,
url: process.env.URL_CLASS
}
]).paginate(process.env.PAGINATE_CLASS)
})(async (err, data) => {
if (err) {
return console.log(err);
}
const safeData = Records.safeParse(data.items);
if (!safeData.success) {
const validationError = fromZodError(err as unknown as z.ZodError);
console.log(validationError);
return;
}
const allPromises = safeData.data.map(item => {
return prisma.record.upsert({
where: {
url: item.url
},
create: {
name: item.name.replace(/\t|\n/g, ''),
areas: item.areas,
url: item.url
},
update: {
name: item.name.replace(/\t|\n/g, ''),
areas: item.areas
}
});
});
await Promise.all(allPromises);
const names = safeData.data.map(item => {
return item.name.replace(/\t|\n/g, '');
});
console.log(names, `Found ${names.length} records.`);
});

10
tsconfig.json Normal file
View File

@@ -0,0 +1,10 @@
{
"compilerOptions": {
"target": "ES2018",
"module": "CommonJS",
"esModuleInterop": true,
"strict": true
},
"include": ["src/**/*.ts"],
"exclude": ["node_modules"]
}

4043
yarn.lock Normal file

File diff suppressed because it is too large Load Diff