From d02da321a16099a2378d576a7103256c5d6004dd Mon Sep 17 00:00:00 2001 From: Tom Date: Mon, 3 Mar 2025 21:18:46 +0000 Subject: [PATCH] Changed book status to smallint. Added media_type to series. Added 'Hanashi Media' regex resolver for searching. Removed 'Fiction' limitation when searching. Added update series to add new volumes. Fixed search when not all volumes would show up. --- backend/database.postgres.sql | 3 +- .../src/books/books.service.ts | 14 +- .../src/books/dto/create-book-status.dto.ts | 2 +- .../src/books/entities/book-status.entity.ts | 4 +- .../src/library/library.consumer.ts | 162 +++++++++++------- .../src/library/library.controller.ts | 67 ++++++-- .../src/library/library.service.ts | 12 +- .../contexts/google.search.context.ts | 16 +- .../providers/dto/book-search-result.dto.ts | 4 + .../src/providers/google/google.service.ts | 36 +++- .../dto/create-series-subscription-job.dto.ts | 4 + .../dto/create-series-subscription.dto.ts | 4 + .../src/series/dto/create-series.dto.ts | 4 + .../src/series/entities/series.entity.ts | 3 + 14 files changed, 250 insertions(+), 85 deletions(-) diff --git a/backend/database.postgres.sql b/backend/database.postgres.sql index 22f6e6b..ced7e4f 100644 --- a/backend/database.postgres.sql +++ b/backend/database.postgres.sql @@ -25,6 +25,7 @@ CREATE TABLE -- 3rd party id for this series. provider_series_id text, series_title text NOT NULL, + media_type text, -- 3rd party used to fetch the data for this series. provider varchar(12) NOT NULL, added_at timestamp default NULL, @@ -113,7 +114,7 @@ CREATE TABLE book_statuses ( user_id uuid, book_id uuid, - state varchar(12), + state smallint, added_at timestamp default NULL, modified_at timestamp default NULL, PRIMARY KEY (user_id, book_id), diff --git a/backend/nestjs-seshat-api/src/books/books.service.ts b/backend/nestjs-seshat-api/src/books/books.service.ts index fa66bfb..9a92ff9 100644 --- a/backend/nestjs-seshat-api/src/books/books.service.ts +++ b/backend/nestjs-seshat-api/src/books/books.service.ts @@ -9,6 +9,7 @@ import { CreateBookDto } from './dto/create-book.dto'; import { CreateBookOriginDto } from './dto/create-book-origin.dto'; import { CreateBookStatusDto } from './dto/create-book-status.dto'; import { DeleteBookStatusDto } from './dto/delete-book-status.dto'; +import { SeriesDto } from 'src/series/dto/series.dto'; @Injectable() export class BooksService { @@ -62,12 +63,21 @@ export class BooksService { }); } + async findBooksFromSeries(series: SeriesDto) { + return await this.bookRepository.find({ + where: { + providerSeriesId: series.providerSeriesId, + provider: series.provider, + } + }); + } + async findBookStatusesTrackedBy(userId: UUID): Promise { return await this.bookStatusRepository.createQueryBuilder('s') .select(['s.book_id', 's.user_id']) .where('s.user_id = :id', { id: userId }) .innerJoin('s.book', 'b') - .addSelect(['b.book_title', 'b.book_desc', 'b.book_volume', 'b.provider']) + .addSelect(['b.book_title', 'b.book_desc', 'b.book_volume', 'b.provider', 'b.providerSeriesId']) .getMany(); } @@ -101,7 +111,7 @@ export class BooksService { await this.bookStatusRepository.createQueryBuilder() .insert() .values(status) - .orUpdate(['user_id', 'book_id'], ['state', 'modified_at'], { skipUpdateIfNoValuesChanged: true }) + .orUpdate(['state', 'modified_at'], ['user_id', 'book_id'], { skipUpdateIfNoValuesChanged: true }) .execute(); } } diff --git a/backend/nestjs-seshat-api/src/books/dto/create-book-status.dto.ts b/backend/nestjs-seshat-api/src/books/dto/create-book-status.dto.ts index d5599ed..c53329e 100644 --- a/backend/nestjs-seshat-api/src/books/dto/create-book-status.dto.ts +++ b/backend/nestjs-seshat-api/src/books/dto/create-book-status.dto.ts @@ -13,7 +13,7 @@ export class CreateBookStatusDto { @IsString() @IsNotEmpty() - state: string; + state: number; modifiedAt: Date; } \ No newline at end of file diff --git a/backend/nestjs-seshat-api/src/books/entities/book-status.entity.ts b/backend/nestjs-seshat-api/src/books/entities/book-status.entity.ts index ec101b8..868e6d9 100644 --- a/backend/nestjs-seshat-api/src/books/entities/book-status.entity.ts +++ b/backend/nestjs-seshat-api/src/books/entities/book-status.entity.ts @@ -11,8 +11,8 @@ export class BookStatusEntity { @PrimaryColumn({ name: 'user_id', type: 'uuid' }) readonly userId: UUID; - @Column({ name: 'state', type: 'varchar' }) - state: string; + @Column({ name: 'state', type: 'smallint' }) + state: number; @Column({ name: 'added_at', type: 'timestamptz', nullable: false }) addedAt: Date diff --git a/backend/nestjs-seshat-api/src/library/library.consumer.ts b/backend/nestjs-seshat-api/src/library/library.consumer.ts index 75d01a3..5980290 100644 --- a/backend/nestjs-seshat-api/src/library/library.consumer.ts +++ b/backend/nestjs-seshat-api/src/library/library.consumer.ts @@ -26,69 +26,67 @@ export class LibraryConsumer extends WorkerHost { msg: 'Started task on queue.', }); - const series: CreateSeriesSubscriptionJobDto = job.data; + if (job.name == 'new_series') { + const series: CreateSeriesSubscriptionJobDto = job.data; + const books = await this.search(job, series, false); - let context = this.provider.generateSearchContext(series.provider, series.title) as GoogleSearchContext; - //context.intitle = series.title; - context.maxResults = '40'; - context.subject = 'Fiction'; - - // Search for the book(s) via the provider. - // Up until end of results or after 3 unhelpful pages of results. - let results = []; - let related = []; - let pageSearchedCount = 0; - let unhelpfulResultsCount = 0; - do { - pageSearchedCount += 1; - results = await this.provider.search(context); - const potential = results.filter(r => r.providerSeriesId == series.providerSeriesId || r.title == series.title); - if (potential.length > 0) { - related.push.apply(related, potential); - } else { - unhelpfulResultsCount += 1; + let counter = 0; + for (let book of books) { + try { + // Force the provider's series id to be set, so that we know which series this belongs. + book.result.providerSeriesId = series.providerSeriesId; + await this.library.addBook(book.result); + } catch (err) { + this.logger.error({ + class: LibraryConsumer.name, + method: this.process.name, + book: book.result, + score: book.score, + msg: 'Failed to add book in background during adding series.', + error: err, + }); + } finally { + counter++; + job.updateProgress(25 + 75 * counter / books.length); + } } - context = context.next(); - job.updateProgress(pageSearchedCount * 5); - } while (results.length >= 40 && unhelpfulResultsCount < 3); + } else if (job.name == 'update_series') { + const series: CreateSeriesSubscriptionJobDto = job.data; + const existingBooks = await this.library.getBooksFromSeries(series); + const existingVolumes = existingBooks.map(b => b.volume); + const books = await this.search(job, series, true); - // Sort & de-duplicate the entries received. - const books = related.map(book => this.toScore(book, series)) - .sort((a, b) => a.result.volume - b.result.volume || b.score - a.score) - .filter((_, index, arr) => index == 0 || arr[index - 1].result.volume != arr[index].result.volume); - job.updateProgress(25); + let counter = 0; + for (let book of books) { + if (existingVolumes.includes(book.result.volume)) { + continue; + } - this.logger.debug({ - class: LibraryConsumer.name, - method: this.process.name, - job: job, - msg: 'Finished searching for book entries.', - results: { - pages: pageSearchedCount, - related_entries: related.length, - volumes: books.length, - } - }); - - let counter = 0; - for (let book of books) { - try { - // Force the provider's series id to be set, so that we know which series this belongs. - book.result.providerSeriesId = series.providerSeriesId; - await this.library.addBook(book.result); - } catch (err) { - this.logger.error({ - class: LibraryConsumer.name, - method: this.process.name, - book: book.result, - score: book.score, - msg: 'Failed to add book in background.', - error: err, - }); - } finally { - counter++; - job.updateProgress(25 + 75 * counter / books.length); + try { + // Force the provider's series id to be set, so that we know which series this belongs. + book.result.providerSeriesId = series.providerSeriesId; + await this.library.addBook(book.result); + } catch (err) { + this.logger.error({ + class: LibraryConsumer.name, + method: this.process.name, + book: book.result, + score: book.score, + msg: 'Failed to add book in background during series update.', + error: err, + }); + } finally { + counter++; + job.updateProgress(25 + 75 * counter / books.length); + } } + } else { + this.logger.warn({ + class: LibraryConsumer.name, + method: this.process.name, + job: job, + msg: 'Unknown job name found.', + }); } this.logger.info({ @@ -101,6 +99,54 @@ export class LibraryConsumer extends WorkerHost { return null; } + private async search(job: Job, series: CreateSeriesSubscriptionJobDto, newest: boolean): Promise<{ result: BookSearchResultDto, score: number }[]> { + let context = this.provider.generateSearchContext(series.provider, series.title) as GoogleSearchContext; + context.maxResults = '40'; + if (newest) { + context.orderBy = 'newest'; + } + + // Search for the book(s) via the provider. + // Up until end of results or after 3 unhelpful pages of results. + let results = []; + let related = []; + let pageSearchedCount = 0; + let unhelpfulResultsCount = 0; + do { + pageSearchedCount += 1; + results = await this.provider.search(context); + const potential = results.filter((r: BookSearchResultDto) => r.providerSeriesId == series.providerSeriesId || r.title == series.title && r.mediaType == series.mediaType); + if (potential.length > 0) { + related.push.apply(related, potential); + } else { + unhelpfulResultsCount += 1; + } + context = context.next(); + job.updateProgress(pageSearchedCount * 5); + } while (results.length >= context.maxResults && (!newest || unhelpfulResultsCount < 3)); + + // Sort & de-duplicate the entries received. + const books = related.map(book => this.toScore(book, series)) + .sort((a, b) => a.result.volume - b.result.volume || b.score - a.score) + .filter((_, index, arr) => index == 0 || arr[index - 1].result.volume != arr[index].result.volume); + job.updateProgress(25); + + + this.logger.debug({ + class: LibraryConsumer.name, + method: this.search.name, + job: job, + msg: 'Finished searching for book entries.', + results: { + pages: pageSearchedCount, + related_entries: related.length, + volumes: books.length, + } + }); + + return books; + } + @OnQueueEvent('failed') onFailed(job: Job, err: Error) { this.logger.error({ diff --git a/backend/nestjs-seshat-api/src/library/library.controller.ts b/backend/nestjs-seshat-api/src/library/library.controller.ts index acd1475..177e998 100644 --- a/backend/nestjs-seshat-api/src/library/library.controller.ts +++ b/backend/nestjs-seshat-api/src/library/library.controller.ts @@ -1,5 +1,5 @@ import { InjectQueue } from '@nestjs/bullmq'; -import { Body, Controller, Delete, Get, Post, Put, Request, Res, UseGuards } from '@nestjs/common'; +import { Body, Controller, Delete, Get, Patch, Post, Put, Request, Res, UseGuards } from '@nestjs/common'; import { Response } from 'express'; import { Queue } from 'bullmq'; import { PinoLogger } from 'nestjs-pino'; @@ -49,6 +49,44 @@ export class LibraryController { provider: body.provider, providerSeriesId: body.providerSeriesId, title: body.title, + mediaType: body.mediaType, + }); + + return { + success: true, + }; + } catch (err) { + if (err instanceof QueryFailedError) { + if (err.driverError.code == '23505') { + // Subscription already exist. + response.statusCode = 409; + return { + success: false, + error_message: 'Series subscription already exists.', + }; + } + } + + response.statusCode = 500; + return { + success: false, + error_message: 'Something went wrong.', + }; + } + } + + @Patch('series') + async updateSeries( + @Request() req, + @Body() body: CreateSeriesSubscriptionDto, + @Res({ passthrough: true }) response: Response, + ) { + try { + await this.library.updateSeries({ + provider: body.provider, + providerSeriesId: body.providerSeriesId, + title: body.title, + mediaType: body.mediaType, }); return { @@ -149,9 +187,18 @@ export class LibraryController { provider: body.provider, providerSeriesId: body.providerSeriesId, title: body.title, + mediaType: body.mediaType, }); } catch (err) { if (err instanceof QueryFailedError) { + this.logger.error({ + class: LibraryController.name, + method: this.createBook.name, + user: req.user, + msg: 'Failed to create a series for a book.', + error: err, + }); + // Ignore if the series already exist. if (err.driverError.code != '23505') { response.statusCode = 500; @@ -171,6 +218,14 @@ export class LibraryController { }; } catch (err) { if (err instanceof QueryFailedError) { + this.logger.error({ + class: LibraryController.name, + method: this.createBook.name, + user: req.user, + msg: 'Failed to create book.', + error: err, + }); + if (err.driverError.code == '23505') { // Book exists already. response.statusCode = 409; @@ -179,7 +234,7 @@ export class LibraryController { error_message: 'The book has already been added previously.', }; } else if (err.driverError.code == '23503') { - // Data dependency is missing. + // Series is missing. response.statusCode = 500; return { success: false, @@ -188,14 +243,6 @@ export class LibraryController { } } - this.logger.error({ - class: LibraryController.name, - method: this.createBook.name, - user: req.user, - msg: 'Failed to create book.', - error: err, - }); - response.statusCode = 500; return { success: false, diff --git a/backend/nestjs-seshat-api/src/library/library.service.ts b/backend/nestjs-seshat-api/src/library/library.service.ts index a8b99ce..c900b93 100644 --- a/backend/nestjs-seshat-api/src/library/library.service.ts +++ b/backend/nestjs-seshat-api/src/library/library.service.ts @@ -3,10 +3,10 @@ import { Injectable } from '@nestjs/common'; import { Queue } from 'bullmq'; import { PinoLogger } from 'nestjs-pino'; import { BooksService } from 'src/books/books.service'; -import { CreateBookDto } from 'src/books/dto/create-book.dto'; import { BookSearchResultDto } from 'src/providers/dto/book-search-result.dto'; import { CreateSeriesDto } from 'src/series/dto/create-series.dto'; import { SeriesSubscriptionDto } from 'src/series/dto/series-subscription.dto'; +import { SeriesDto } from 'src/series/dto/series.dto'; import { SeriesService } from 'src/series/series.service'; import { BookOriginType } from 'src/shared/enums/book_origin_type'; @@ -26,7 +26,7 @@ export class LibraryService { this.logger.debug({ class: LibraryService.name, method: this.addSubscription.name, - series: series.providerSeriesId, + series: series, msg: 'Series saved to database.', }); @@ -135,4 +135,12 @@ export class LibraryService { return bookId; } + + async updateSeries(series: CreateSeriesDto) { + return await this.jobs.add('update_series', series); + } + + async getBooksFromSeries(series: SeriesDto) { + return await this.books.findBooksFromSeries(series); + } } diff --git a/backend/nestjs-seshat-api/src/providers/contexts/google.search.context.ts b/backend/nestjs-seshat-api/src/providers/contexts/google.search.context.ts index 545a6af..615e032 100644 --- a/backend/nestjs-seshat-api/src/providers/contexts/google.search.context.ts +++ b/backend/nestjs-seshat-api/src/providers/contexts/google.search.context.ts @@ -7,7 +7,7 @@ export class GoogleSearchContext extends SearchContext { generateQueryParams() { - const filterParams = ['maxResults', 'startIndex']; + const filterParams = ['maxResults', 'startIndex', 'orderBy']; const searchParams = ['intitle', 'inauthor', 'inpublisher', 'subject', 'isbn']; const queryParams = filterParams @@ -21,7 +21,19 @@ export class GoogleSearchContext extends SearchContext { ...searchParams.map(p => this.params[p] ? p + ':"' + this.params[p] + '"' : ''), ].filter(p => p.length > 0).join(''); - return [queryParams, 'q=' + searchQueryParam].filter(q => q.length > 0).join('&'); + return [queryParams, 'q=' + searchQueryParam].filter(q => q.length > 2).join('&'); + } + + get orderBy(): 'newest' | 'relevant' { + return this.params['orderBy'] as 'newest' | 'relevant' ?? 'relevant'; + } + + set orderBy(value: 'newest' | 'relevant' | null) { + if (!value) { + delete this.params['orderBy']; + } else { + this.params['orderBy'] = value; + } } get maxResults(): number { diff --git a/backend/nestjs-seshat-api/src/providers/dto/book-search-result.dto.ts b/backend/nestjs-seshat-api/src/providers/dto/book-search-result.dto.ts index ca68879..5bcd0e4 100644 --- a/backend/nestjs-seshat-api/src/providers/dto/book-search-result.dto.ts +++ b/backend/nestjs-seshat-api/src/providers/dto/book-search-result.dto.ts @@ -48,6 +48,10 @@ export class BookSearchResultDto { @IsNotEmpty() language: string; + @IsString() + @IsOptional() + mediaType: string | null; + @IsArray() @IsString({ each: true }) categories: string[]; diff --git a/backend/nestjs-seshat-api/src/providers/google/google.service.ts b/backend/nestjs-seshat-api/src/providers/google/google.service.ts index e9e6040..e83e678 100644 --- a/backend/nestjs-seshat-api/src/providers/google/google.service.ts +++ b/backend/nestjs-seshat-api/src/providers/google/google.service.ts @@ -59,9 +59,10 @@ export class GoogleService { volume: item.volumeInfo.seriesInfo?.bookDisplayNumber ? parseInt(item.volumeInfo.seriesInfo?.bookDisplayNumber, 10) : undefined, publisher: item.volumeInfo.publisher, authors: item.volumeInfo.authors, - categories: item.volumeInfo.categories, + categories: item.volumeInfo.categories ?? [], + mediaType: null, maturityRating: item.volumeInfo.maturityRating, - industryIdentifiers: item.volumeInfo.industryIdentifiers ? Object.assign({}, ...item.volumeInfo.industryIdentifiers.map(i => ({ [i.type]: i.identifier }))) : [], + industryIdentifiers: item.volumeInfo.industryIdentifiers ? Object.assign({}, ...item.volumeInfo.industryIdentifiers.map(i => i.type == 'OTHER' ? { [i.identifier.split(':')[0]]: i.identifier.split(':')[1] } : { [i.type]: i.identifier })) : [], publishedAt: new Date(item.volumeInfo.publishedDate), language: item.volumeInfo.language, thumbnail: item.volumeInfo.imageLinks?.thumbnail, @@ -69,8 +70,7 @@ export class GoogleService { provider: 'google' } - let regex = this.getRegexByPublisher(result.publisher); - + const regex = this.getRegexByPublisher(result.publisher); const match = result.title.match(regex); if (match?.groups) { result.title = match.groups['title'].trim(); @@ -79,19 +79,41 @@ export class GoogleService { } } + if (match?.groups && 'media_type' in match.groups) { + result.mediaType = match.groups['media_type']; + } else if (result.categories.includes('Comics & Graphic Novels')) { + result.mediaType = 'Comics & Graphic Novels'; + } else if (result.categories.includes('Fiction') || result.categories.includes('Young Adult Fiction')) { + result.mediaType = 'Novel'; + } else { + result.mediaType = 'Book'; + } + + if (result.mediaType) { + if (result.mediaType.toLowerCase() == "light novel") { + result.mediaType = 'Light Novel'; + } else if (result.mediaType.toLowerCase() == 'manga') { + result.mediaType = 'Manga'; + } + } + return result; } private getRegexByPublisher(publisher: string): RegExp { switch (publisher) { case 'J-Novel Club': - return /(?.+?):?\sVolume\s(?<volume>\d+)/i; + return /^(?<title>.+?):?\sVolume\s(?<volume>\d+)$/i; case 'Yen On': case 'Yen Press': case 'Yen Press LLC': - return /(?<title>.+?),?\sVol\.\s(?<volume>\d+)\s\((?<media_type>[\w\s]+)\)/; + return /^(?<title>.+?)(?:,?\sVol\.\s(?<volume>\d+))?\s\((?<media_type>[\w\s]+)\)$/; + case 'Hanashi Media': + return /^(?<title>.+?)\s\((?<media_type>[\w\s]+)\),?\sVol\.\s(?<volume>\d+)$/ + case 'Regin\'s Chronicles': + return /^(?<title>.+?)\s\((?<media_type>[\w\s]+)\)(?<subtitle>\:\s.+?)?$/ default: - return /(?<title>.+?)(?:,|:|\s\-)?\s(?:Vol(?:\.|ume)?)?\s(?<volume>\d+)/; + return /^(?<title>.+?)(?:,|:|\s\-)?\s(?:Vol(?:\.|ume)?)?\s(?<volume>\d+)$/; } } } diff --git a/backend/nestjs-seshat-api/src/series/dto/create-series-subscription-job.dto.ts b/backend/nestjs-seshat-api/src/series/dto/create-series-subscription-job.dto.ts index b2d9927..c5ee724 100644 --- a/backend/nestjs-seshat-api/src/series/dto/create-series-subscription-job.dto.ts +++ b/backend/nestjs-seshat-api/src/series/dto/create-series-subscription-job.dto.ts @@ -5,4 +5,8 @@ export class CreateSeriesSubscriptionJobDto extends SeriesSubscriptionDto { @IsString() @IsNotEmpty() title: string; + + @IsString() + @IsNotEmpty() + mediaType: string; } \ No newline at end of file diff --git a/backend/nestjs-seshat-api/src/series/dto/create-series-subscription.dto.ts b/backend/nestjs-seshat-api/src/series/dto/create-series-subscription.dto.ts index 7721d4f..92552f4 100644 --- a/backend/nestjs-seshat-api/src/series/dto/create-series-subscription.dto.ts +++ b/backend/nestjs-seshat-api/src/series/dto/create-series-subscription.dto.ts @@ -5,4 +5,8 @@ export class CreateSeriesSubscriptionDto extends SeriesDto { @IsString() @IsNotEmpty() title: string; + + @IsString() + @IsNotEmpty() + mediaType: string; } \ No newline at end of file diff --git a/backend/nestjs-seshat-api/src/series/dto/create-series.dto.ts b/backend/nestjs-seshat-api/src/series/dto/create-series.dto.ts index 635a0d9..416260c 100644 --- a/backend/nestjs-seshat-api/src/series/dto/create-series.dto.ts +++ b/backend/nestjs-seshat-api/src/series/dto/create-series.dto.ts @@ -5,4 +5,8 @@ export class CreateSeriesDto extends SeriesDto { @IsString() @IsNotEmpty() title: string; + + @IsString() + @IsNotEmpty() + mediaType: string; } \ No newline at end of file diff --git a/backend/nestjs-seshat-api/src/series/entities/series.entity.ts b/backend/nestjs-seshat-api/src/series/entities/series.entity.ts index 75efea2..30eca1e 100644 --- a/backend/nestjs-seshat-api/src/series/entities/series.entity.ts +++ b/backend/nestjs-seshat-api/src/series/entities/series.entity.ts @@ -14,6 +14,9 @@ export class SeriesEntity { @Column({ name: 'series_title', type: 'text', nullable: false }) title: string; + @Column({ name: 'media_type', type: 'text', nullable: true }) + mediaType: string; + @Column({ name: 'provider', type: 'text', nullable: false }) provider: string;